diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index 82661c6c66..5bee0c9be0 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -35,6 +35,9 @@ new_output = { 'madevent_simd' : output.SIMD_ProcessExporter, 'madevent_gpu' : output.GPU_ProcessExporter, 'standalone_cudacpp' : output.PLUGIN_ProcessExporter, + # the following one are used for the second exporter class + # (not really needed so far but interesting if need + # specialization in the futur) 'standalone_simd' : output.SIMD_ProcessExporter, 'standalone_cuda' : output.GPU_ProcessExporter, } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/counters.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc similarity index 100% rename from epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/counters.cc rename to epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/counters.cc diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/fbridge_common.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge_common.inc similarity index 100% rename from epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/fbridge_common.inc rename to epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge_common.inc diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/ompnumthreads.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/ompnumthreads.cc similarity index 100% rename from epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/ompnumthreads.cc rename to epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/ompnumthreads.cc diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index f2ed8897b3..a75a9dce64 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -110,6 +110,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): s+'gpu/perf.py', s+'gpu/profile.sh', s+'CMake/SubProcesses/CMakeLists.txt'], 'test': [s+'gpu/cudacpp_test.mk']} + to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', @@ -196,13 +197,15 @@ def generate_subprocess_directory(self, subproc_group, fortran_model, me=None): misc.sprint(' type(subproc_group)=%s'%type(subproc_group)) # e.g. madgraph.core.helas_objects.HelasMatrixElement misc.sprint(' type(fortran_model)=%s'%type(fortran_model)) # e.g. madgraph.iolibs.helas_call_writers.GPUFOHelasCallWriter misc.sprint(' type(me)=%s me=%s'%(type(me) if me is not None else None, me)) # e.g. int - return super().generate_subprocess_directory(subproc_group, fortran_model, me) - + misc.sprint("need to link", self.to_link_in_P) + out = super().generate_subprocess_directory(subproc_group, fortran_model, me) + return out # AV (default from OM's tutorial) - add a debug printout def convert_model(self, model, wanted_lorentz=[], wanted_coupling=[]): misc.sprint('Entering PLUGIN_ProcessExporter.convert_model (create the model)') return super().convert_model(model, wanted_lorentz, wanted_coupling) + # AV (default from OM's tutorial) - add a debug printout def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): """Typically creating jpeg/HTML output/ compilation/... @@ -281,7 +284,22 @@ def add_madevent_plugin_fct(self): #------------------------------------------------------------------------------------ -class SIMD_ProcessExporter(PLUGIN_ProcessExporter): +class PLUGIN_ProcessExporter_MadEvent(PLUGIN_ProcessExporter): + """ a class to include all tweak related to madevent and not related to standalone. + in practise this class is never called but only the SIMD or GPU related class""" + + s = PLUGINDIR + '/madgraph/iolibs/template_files/' + # add template file/ linking only needed in the madevent mode and not in standalone + from_template = dict(PLUGIN_ProcessExporter.from_template) + from_template['SubProcesses'] = from_template['SubProcesses'] + [s+'gpu/fbridge_common.inc', + s+'gpu/counters.cc', + s+'gpu/ompnumthreads.cc'] + + to_link_in_P = PLUGIN_ProcessExporter.to_link_in_P + ['fbridge_common.inc', 'counters.cc','ompnumthreads.cc'] + +#------------------------------------------------------------------------------------ + +class SIMD_ProcessExporter(PLUGIN_ProcessExporter_MadEvent): def change_output_args(args, cmd): """ """ cmd._export_format = "madevent" @@ -293,7 +311,7 @@ def change_output_args(args, cmd): #------------------------------------------------------------------------------------ -class GPU_ProcessExporter(PLUGIN_ProcessExporter): +class GPU_ProcessExporter(PLUGIN_ProcessExporter_MadEvent): def change_output_args(args, cmd): """ """ cmd._export_format = "madevent" diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh index 7edafba599..8739cff3ea 100755 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/patchMad.sh @@ -46,7 +46,7 @@ if [ "${patchlevel}" == "0" ]; then exit $status; fi # Patch the default Fortran code to provide the integration with the cudacpp plugin # (1) Process-independent patches touch ${dir}/Events/.keep # this file should already be present (mg5amcnlo copies it from Template/LO/Events/.keep) -\cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file +#\cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/fbridge_common.inc ${dir}/SubProcesses # new file if [ "${patchlevel}" == "2" ]; then cd ${dir} echo "DEBUG: cd ${PWD}; patch -p4 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.common" @@ -57,13 +57,9 @@ if [ "${patchlevel}" == "2" ]; then fi for p1dir in ${dir}/SubProcesses/P*; do cd $p1dir - ln -sf ../fbridge_common.inc . # new file - cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file - cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file - ###cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc ${dir}/SubProcesses/ # new file (SH) - ###cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc ${dir}/SubProcesses/ # new file (SH) - ###ln -sf ../counters.cc . # new file (SH) - ###ln -sf ../ompnumthreads.cc . # new file (SH) + #ln -sf ../fbridge_common.inc . # new file + #cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/counters.cc . # new file + #cp -pr ${scrdir}/MG5aMC_patches/${dir_patches}/ompnumthreads.cc . # new file if [ "${patchlevel}" == "2" ]; then echo "DEBUG: cd ${PWD}; patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1" if ! patch -p6 -i ${scrdir}/MG5aMC_patches/${dir_patches}/patch.P1; then status=1; fi diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index 72b6687dd0..c9e85c1b91 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -232,9 +232,9 @@ function codeGenAndDiff() elif [ "${OUTBCK}" == "madonly" ]; then # $SCRBCK=cudacpp and $OUTBCK=madonly echo "output madevent ${outproc} ${helrecopt} --vector_size=${vecsize}" >> ${outproc}.mg elif [ "${OUTBCK}" == "mad" ]; then # $SCRBCK=cudacpp and $OUTBCK=mad - echo "output madevent ${outproc} ${helrecopt} --vector_size=${vecsize} --me_exporter=standalone_cudacpp" >> ${outproc}.mg + echo "output madevent_simd ${outproc} ${helrecopt} --vector_size=${vecsize} " >> ${outproc}.mg elif [ "${OUTBCK}" == "madcpp" ]; then # $SCRBCK=cudacpp and $OUTBCK=madcpp - echo "output madevent ${outproc} ${helrecopt} --vector_size=32 --me_exporter=standalone_cpp" >> ${outproc}.mg + echo "output madevent_simd ${outproc} ${helrecopt} --vector_size=32" >> ${outproc}.mg elif [ "${OUTBCK}" == "madgpu" ]; then # $SCRBCK=cudacpp and $OUTBCK=madgpu echo "output madevent ${outproc} ${helrecopt} --vector_size=32 --me_exporter=standalone_gpu" >> ${outproc}.mg else # $SCRBCK=cudacpp and $OUTBCK=cudacpp, cpp or gpu diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 122537896c..c0d823893a 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00548553466796875  +DEBUG: model prefixing takes 0.005708217620849609  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -156,27 +156,28 @@ INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams 1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams -output madevent ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_ee_mumu INFO: remove old information in CODEGEN_mad_ee_mumu -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,19 +194,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.098 s +Wrote files for 8 helas calls in 0.101 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.198 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 3 routines in 0.207 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.260 s +ALOHA: aloha creates 7 routines in 0.275 s FFV1 FFV1 FFV2 @@ -214,28 +215,27 @@ ALOHA: aloha creates 7 routines in 0.260 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 496 (offset 12 lines). patching file driver.f @@ -243,16 +243,16 @@ patching file matrix1.f Hunk #3 succeeded at 230 (offset 9 lines). Hunk #4 succeeded at 267 (offset 18 lines). Hunk #5 succeeded at 312 (offset 18 lines). -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m1.925s -user 0m1.621s -sys 0m0.231s +real 0m1.910s +user 0m1.675s +sys 0m0.221s Code generation completed in 2 seconds ************************************************************ * * @@ -274,9 +274,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -304,9 +304,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat index b49ff5e24a..9b246807bc 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat @@ -45,5 +45,5 @@ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -output madevent ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --\ -vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=Fal\ +se --vector_size=32 diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/ompnumthreads.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/ompnumthreads.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/ee_mumu.mad/mg5.in b/epochX/cudacpp/ee_mumu.mad/mg5.in index 4e83015b40..ce5c0456e0 100644 --- a/epochX/cudacpp/ee_mumu.mad/mg5.in +++ b/epochX/cudacpp/ee_mumu.mad/mg5.in @@ -1,4 +1,4 @@ set stdout_level DEBUG set zerowidth_tchannel F generate e+ e- > mu+ mu- -output madevent ee_mumu.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ee_mumu.mad --hel_recycling=False --vector_size=32 diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 0ca2931a2d..71b04c8320 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005585908889770508  +DEBUG: model prefixing takes 0.005678653717041016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -162,28 +162,29 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=0 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=0 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.267 s +ALOHA: aloha creates 4 routines in 0.272 s FFV1 FFV1 FFV2 @@ -192,17 +193,17 @@ ALOHA: aloha creates 4 routines in 0.267 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.705s -user 0m0.594s -sys 0m0.053s -Code generation completed in 1 seconds +real 0m0.668s +user 0m0.613s +sys 0m0.048s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 465f0fdf8e..1fc03e0c34 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005699872970581055  +DEBUG: model prefixing takes 0.005725383758544922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -157,27 +157,28 @@ INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams 1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams -output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -193,56 +194,55 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.099 s +Wrote files for 10 helas calls in 0.103 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.146 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 2 routines in 0.147 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.132 s +ALOHA: aloha creates 4 routines in 0.135 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m1.712s -user 0m1.461s -sys 0m0.230s -Code generation completed in 1 seconds +real 0m1.715s +user 0m1.502s +sys 0m0.214s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * @@ -263,9 +263,9 @@ Code generation completed in 1 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -293,9 +293,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index 9973b6a3db..90d6b27048 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -45,5 +45,5 @@ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --ve\ -ctor_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False\ + --vector_size=32 diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_tt.mad/mg5.in b/epochX/cudacpp/gg_tt.mad/mg5.in index b4b356fc51..95b259f47e 100644 --- a/epochX/cudacpp/gg_tt.mad/mg5.in +++ b/epochX/cudacpp/gg_tt.mad/mg5.in @@ -1,4 +1,4 @@ set stdout_level DEBUG set zerowidth_tchannel F generate g g > t t~ -output madevent gg_tt.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd gg_tt.mad --hel_recycling=False --vector_size=32 diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 04ee1fae0a..96a207eb00 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005332469940185547  +DEBUG: model prefixing takes 0.005444765090942383  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -163,41 +163,42 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=0 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=0 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.146 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.640s -user 0m0.463s -sys 0m0.061s -Code generation completed in 0 seconds +real 0m0.539s +user 0m0.476s +sys 0m0.058s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 86aeb0137d..99081f6854 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005301952362060547  +DEBUG: model prefixing takes 0.005807399749755859  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -163,23 +163,24 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.019 s +1 processes with 16 diagrams generated in 0.020 s Total: 2 processes with 19 diagrams -output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_gg_tt01g INFO: remove old information in CODEGEN_mad_gg_tt01g -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @2 INFO: Processing color information for process: g g > t t~ g @2 @@ -187,7 +188,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -204,7 +205,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -219,23 +220,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s -Wrote files for 46 helas calls in 0.250 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s +Wrote files for 46 helas calls in 0.247 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 5 routines in 0.334 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.307 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -245,32 +246,31 @@ ALOHA: aloha creates 10 routines in 0.307 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -278,16 +278,16 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m2.306s -user 0m2.029s -sys 0m0.243s +real 0m2.337s +user 0m2.082s +sys 0m0.248s Code generation completed in 2 seconds ************************************************************ * * @@ -309,9 +309,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -339,9 +339,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat index 2fd6fdb2bf..1b2fc5f0b6 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat @@ -46,5 +46,5 @@ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ add process g g > t t~ g -output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False -\ --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=Fa\ +lse --vector_size=32 diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_tt01g.mad/mg5.in b/epochX/cudacpp/gg_tt01g.mad/mg5.in index 95984fcf10..f297253b90 100644 --- a/epochX/cudacpp/gg_tt01g.mad/mg5.in +++ b/epochX/cudacpp/gg_tt01g.mad/mg5.in @@ -2,4 +2,4 @@ set stdout_level DEBUG set zerowidth_tchannel F generate g g > t t~ add process g g > t t~ g -output madevent gg_tt01g.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd gg_tt01g.mad --hel_recycling=False --vector_size=32 diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 1d520f7648..fa7fec111b 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055522918701171875  +DEBUG: model prefixing takes 0.005398988723754883  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,29 +155,30 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams -output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_gg_ttg INFO: remove old information in CODEGEN_mad_gg_ttg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,23 +193,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.148 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Wrote files for 36 helas calls in 0.152 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.327 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 5 routines in 0.338 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.315 s +ALOHA: aloha creates 10 routines in 0.332 s VVV1 VVV1 FFV1 @@ -218,28 +219,27 @@ ALOHA: aloha creates 10 routines in 0.315 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -247,16 +247,16 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m2.204s -user 0m1.932s -sys 0m0.252s +real 0m2.239s +user 0m2.005s +sys 0m0.234s Code generation completed in 2 seconds ************************************************************ * * @@ -278,9 +278,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -308,9 +308,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat index bc26df64f3..72d7a0efd4 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat @@ -45,5 +45,5 @@ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --v\ -ector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=Fals\ +e --vector_size=32 diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_ttg.mad/mg5.in b/epochX/cudacpp/gg_ttg.mad/mg5.in index e37d10d865..f4b43bcd8d 100644 --- a/epochX/cudacpp/gg_ttg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttg.mad/mg5.in @@ -1,4 +1,4 @@ set stdout_level DEBUG set zerowidth_tchannel F generate g g > t t~ g -output madevent gg_ttg.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd gg_ttg.mad --hel_recycling=False --vector_size=32 diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 26d60e142d..ecf3e1d46a 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0054738521575927734  +DEBUG: model prefixing takes 0.005639791488647461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -163,29 +163,30 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=0 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=0 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.332 s VVV1 VVV1 FFV1 @@ -195,17 +196,17 @@ ALOHA: aloha creates 5 routines in 0.323 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.778s -user 0m0.718s -sys 0m0.051s -Code generation completed in 1 seconds +real 0m0.790s +user 0m0.736s +sys 0m0.050s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 8b11d9e97b..11131eaf14 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005313873291015625  +DEBUG: model prefixing takes 0.005640506744384766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,29 +155,30 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.160 s Total: 1 processes with 123 diagrams -output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_gg_ttgg INFO: remove old information in CODEGEN_mad_gg_ttgg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -192,23 +193,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s -Wrote files for 222 helas calls in 0.700 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.432 s +Wrote files for 222 helas calls in 0.704 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.327 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 5 routines in 0.337 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.310 s +ALOHA: aloha creates 10 routines in 0.322 s VVV1 VVV1 FFV1 @@ -221,28 +222,27 @@ ALOHA: aloha creates 10 routines in 0.310 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -250,16 +250,16 @@ Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m3.327s -user 0m3.009s -sys 0m0.283s +real 0m3.333s +user 0m3.080s +sys 0m0.236s Code generation completed in 3 seconds ************************************************************ * * @@ -281,9 +281,9 @@ Code generation completed in 3 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -311,9 +311,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat index b9a294d7a3..f4efb79920 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat @@ -45,5 +45,5 @@ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --\ -vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=Fal\ +se --vector_size=32 diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_ttgg.mad/mg5.in b/epochX/cudacpp/gg_ttgg.mad/mg5.in index 53784bf161..05b3fbcbac 100644 --- a/epochX/cudacpp/gg_ttgg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttgg.mad/mg5.in @@ -1,4 +1,4 @@ set stdout_level DEBUG set zerowidth_tchannel F generate g g > t t~ g g -output madevent gg_ttgg.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd gg_ttgg.mad --hel_recycling=False --vector_size=32 diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 8a72b5a0f4..38a3c3a518 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057239532470703125  +DEBUG: model prefixing takes 0.00548243522644043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.157 s +1 processes with 123 diagrams generated in 0.162 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -163,29 +163,30 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=0 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.421 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=0 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. +Generated helas calls for 1 subprocesses (123 diagrams) in 0.435 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.326 s VVV1 VVV1 FFV1 @@ -198,17 +199,17 @@ ALOHA: aloha creates 5 routines in 0.320 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.486s -user 0m1.373s -sys 0m0.056s +real 0m1.468s +user 0m1.398s +sys 0m0.062s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index df59413576..e8b21a0952 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055010318756103516  +DEBUG: model prefixing takes 0.005578756332397461  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,23 +155,24 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.859 s +1 processes with 1240 diagrams generated in 1.919 s Total: 1 processes with 1240 diagrams -output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_gg_ttggg INFO: remove old information in CODEGEN_mad_gg_ttggg -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 @@ -179,7 +180,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,23 +195,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.577 s -Wrote files for 2281 helas calls in 18.096 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.694 s +Wrote files for 2281 helas calls in 18.830 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.313 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 5 routines in 0.326 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.305 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -223,28 +224,27 @@ ALOHA: aloha creates 10 routines in 0.305 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -252,17 +252,17 @@ Hunk #2 succeeded at 255 (offset 112 lines). Hunk #3 succeeded at 333 (offset 112 lines). Hunk #4 succeeded at 361 (offset 112 lines). Hunk #5 succeeded at 406 (offset 112 lines). -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m28.769s -user 0m28.239s -sys 0m0.414s -Code generation completed in 29 seconds +real 0m30.669s +user 0m29.221s +sys 0m0.403s +Code generation completed in 31 seconds ************************************************************ * * * W E L C O M E to * @@ -283,9 +283,9 @@ Code generation completed in 29 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -313,9 +313,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat index 2e2a09aef7..c16335faca 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat @@ -45,5 +45,5 @@ define l+ = e+ mu+ define l- = e- mu- define vl = ve vm vt define vl~ = ve~ vm~ vt~ -output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False -\ --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=Fa\ +lse --vector_size=32 diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gg_ttggg.mad/mg5.in b/epochX/cudacpp/gg_ttggg.mad/mg5.in index f92d17d219..4865da91cd 100644 --- a/epochX/cudacpp/gg_ttggg.mad/mg5.in +++ b/epochX/cudacpp/gg_ttggg.mad/mg5.in @@ -1,4 +1,4 @@ set stdout_level DEBUG set zerowidth_tchannel F generate g g > t t~ g g g -output madevent gg_ttggg.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd gg_ttggg.mad --hel_recycling=False --vector_size=32 diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index faec804f1b..6d60b544b0 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005358695983886719  +DEBUG: model prefixing takes 0.005746364593505859  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.872 s +1 processes with 1240 diagrams generated in 1.934 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -163,29 +163,30 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=0 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.517 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=0 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.709 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.354 s VVV1 VVV1 FFV1 @@ -198,17 +199,17 @@ ALOHA: aloha creates 5 routines in 0.346 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m12.953s -user 0m12.719s -sys 0m0.135s +real 0m13.375s +user 0m13.203s +sys 0m0.119s Code generation completed in 13 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index e72f8836f6..7015773962 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005477190017700195  +DEBUG: model prefixing takes 0.005769014358520508  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,23 +170,24 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.079 s Total: 8 processes with 40 diagrams -output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_gq_ttq INFO: remove old information in CODEGEN_mad_gq_ttq -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -200,7 +201,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -217,7 +218,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -233,43 +234,42 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.217 s +Wrote files for 32 helas calls in 0.223 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 2 routines in 0.147 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.130 s +ALOHA: aloha creates 4 routines in 0.135 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f @@ -279,7 +279,7 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f @@ -289,17 +289,17 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 247 (offset 26 lines). Hunk #4 succeeded at 281 (offset 32 lines). Hunk #5 succeeded at 326 (offset 32 lines). -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m3.073s -user 0m1.692s -sys 0m0.237s -Code generation completed in 3 seconds +real 0m1.956s +user 0m1.706s +sys 0m0.245s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * @@ -320,9 +320,9 @@ Code generation completed in 3 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -350,9 +350,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat index 5ab65926b2..deab56cf41 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat @@ -47,5 +47,5 @@ define vl = ve vm vt define vl~ = ve~ vm~ vt~ define q = u c d s u~ c~ d~ s~ generate g q > t t~ q -output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --v\ -ector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=Fals\ +e --vector_size=32 diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/gq_ttq.mad/mg5.in b/epochX/cudacpp/gq_ttq.mad/mg5.in index 2273ae9cfd..f02829a969 100644 --- a/epochX/cudacpp/gq_ttq.mad/mg5.in +++ b/epochX/cudacpp/gq_ttq.mad/mg5.in @@ -2,4 +2,4 @@ set stdout_level DEBUG set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ generate g q > t t~ q -output madevent gq_ttq.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd gq_ttq.mad --hel_recycling=False --vector_size=32 diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 3957e3a7d6..8ed0b3a1c7 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00551295280456543  +DEBUG: model prefixing takes 0.005808115005493164  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.077 s +8 processes with 40 diagrams generated in 0.084 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -178,9 +178,9 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -192,44 +192,46 @@ INFO: Processing color information for process: g u~ > t t~ u~ @1 INFO: Combined process g c~ > t t~ c~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=0 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=1 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=0 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=1 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.203 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m1.009s -user 0m0.580s -sys 0m0.067s +real 0m1.010s +user 0m0.617s +sys 0m0.055s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 9d97c918db..ef394b2a87 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -53,15 +53,21 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft +INFO: reload from .py file +INFO: load particles +INFO: load vertices +WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  +WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  +DEBUG: model prefixing takes 0.005836963653564453  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -137,37 +143,38 @@ Load PLUGIN.CUDACPP_OUTPUT It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  plugin [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > h HIG<=1 HIW<=1 WEIGHTED<=2 @1 INFO: Processing color information for process: g g > h HIG<=1 HIW<=1 @1 -DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 195]  -DEBUG: type(subproc_group)= [output.py at line 196]  -DEBUG: type(fortran_model)= [output.py at line 197]  -DEBUG: type(me)= me=0 [output.py at line 198]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. +DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 196]  +DEBUG: type(subproc_group)= [output.py at line 197]  +DEBUG: type(fortran_model)= [output.py at line 198]  +DEBUG: type(me)= me=0 [output.py at line 199]  +DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 200]  +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/SubProcesses/P1_Sigma_heft_gg_h/. Generated helas calls for 1 subprocesses (1 diagrams) in 0.002 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines -ALOHA: aloha creates 1 routines in 0.061 s +ALOHA: aloha creates 1 routines in 0.063 s VVS3 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_h/src/. quit -real 0m0.423s -user 0m0.361s -sys 0m0.053s +real 0m0.451s +user 0m0.390s +sys 0m0.052s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 4b983ad8d3..fa869aa432 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -53,7 +53,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg +import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005466461181640625  +DEBUG: model prefixing takes 0.00567317008972168  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,7 +172,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.030 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.135 s +13 processes with 76 diagrams generated in 0.141 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,23 +378,24 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.826 s +65 processes with 1119 diagrams generated in 1.880 s Total: 83 processes with 1202 diagrams -output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. It has been validated for the last time with version: 3.5.2 +Output will be done with PLUGIN: CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT -DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  +DEBUG: cformat =  standalone_simd [export_cpp.py at line 3071]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 162]  INFO: initialize a new directory: CODEGEN_mad_pp_tt012j INFO: remove old information in CODEGEN_mad_pp_tt012j -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 167]  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Processing color information for process: g g > t t~ g g @2 @@ -499,7 +500,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -516,7 +517,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -533,7 +534,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -550,7 +551,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -567,7 +568,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -584,7 +585,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -601,7 +602,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -618,7 +619,7 @@ INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -635,7 +636,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -652,7 +653,7 @@ INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -669,7 +670,7 @@ INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -686,7 +687,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -703,7 +704,7 @@ INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -720,7 +721,7 @@ INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -737,7 +738,7 @@ INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -754,7 +755,7 @@ INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -771,7 +772,7 @@ INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -788,7 +789,7 @@ INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1057]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -803,23 +804,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.280 s -Wrote files for 810 helas calls in 3.230 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.306 s +Wrote files for 810 helas calls in 3.324 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.333 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  +ALOHA: aloha creates 5 routines in 0.340 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 205]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.309 s +ALOHA: aloha creates 10 routines in 0.317 s VVV1 VVV1 FFV1 @@ -832,32 +833,31 @@ ALOHA: aloha creates 10 routines in 0.309 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py -Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f @@ -867,7 +867,7 @@ Hunk #2 succeeded at 146 (offset 3 lines). Hunk #3 succeeded at 224 (offset 3 lines). Hunk #4 succeeded at 252 (offset 3 lines). Hunk #5 succeeded at 297 (offset 3 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -875,7 +875,7 @@ Hunk #2 succeeded at 159 (offset 16 lines). Hunk #3 succeeded at 237 (offset 16 lines). Hunk #4 succeeded at 265 (offset 16 lines). Hunk #5 succeeded at 310 (offset 16 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f @@ -885,7 +885,7 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f @@ -895,7 +895,7 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f @@ -905,7 +905,7 @@ Hunk #2 succeeded at 162 (offset 19 lines). Hunk #3 succeeded at 240 (offset 19 lines). Hunk #4 succeeded at 268 (offset 19 lines). Hunk #5 succeeded at 313 (offset 19 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f patching file driver.f patching file matrix1.f @@ -913,7 +913,7 @@ Hunk #2 succeeded at 191 (offset 48 lines). Hunk #3 succeeded at 269 (offset 48 lines). Hunk #4 succeeded at 297 (offset 48 lines). Hunk #5 succeeded at 342 (offset 48 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 517 (offset 33 lines). patching file driver.f @@ -923,7 +923,7 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f @@ -933,7 +933,7 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 528 (offset 44 lines). patching file driver.f @@ -943,7 +943,7 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 555 (offset 71 lines). patching file driver.f @@ -953,7 +953,7 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 627 (offset 143 lines). patching file driver.f @@ -963,7 +963,7 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f @@ -973,7 +973,7 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 627 (offset 143 lines). patching file driver.f @@ -983,7 +983,7 @@ Hunk #2 succeeded at 202 (offset 59 lines). Hunk #3 succeeded at 280 (offset 59 lines). Hunk #4 succeeded at 308 (offset 59 lines). Hunk #5 succeeded at 353 (offset 59 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f @@ -993,7 +993,7 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f @@ -1003,7 +1003,7 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 555 (offset 71 lines). patching file driver.f @@ -1013,7 +1013,7 @@ Hunk #2 succeeded at 196 (offset 53 lines). Hunk #3 succeeded at 274 (offset 53 lines). Hunk #4 succeeded at 302 (offset 53 lines). Hunk #5 succeeded at 347 (offset 53 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f Hunk #1 succeeded at 539 (offset 55 lines). patching file driver.f @@ -1023,17 +1023,17 @@ Hunk #2 succeeded at 194 (offset 51 lines). Hunk #3 succeeded at 272 (offset 51 lines). Hunk #4 succeeded at 300 (offset 51 lines). Hunk #5 succeeded at 345 (offset 51 lines). -DEBUG: p.returncode =  0 [output.py at line 238]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. +DEBUG: p.returncode =  0 [output.py at line 241]  +Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README +/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m8.828s -user 0m8.276s -sys 0m0.507s -Code generation completed in 9 seconds +real 0m10.739s +user 0m8.492s +sys 0m0.454s +Code generation completed in 11 seconds ************************************************************ * * * W E L C O M E to * @@ -1054,9 +1054,9 @@ Code generation completed in 9 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -1084,9 +1084,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt index cdeedc7863..ce678812fe 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt @@ -234,7 +234,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat index 8af6ddf9c1..2f22f719bc 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat @@ -49,5 +49,5 @@ define j = p generate p p > t t~ @0 add process p p > t t~ j @1 add process p p > t t~ j j @2 -output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False \ ---vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=F\ +alse --vector_size=32 diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc deleted file mode 100644 index 3bbdec9387..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include "timer.h" -#define TIMERTYPE std::chrono::high_resolution_clock - -#include -#include - -// NB1: The C functions counters_xxx_ in this file are called by Fortran code -// Hence the trailing "_": 'call counters_end()' links to counters_end_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -extern "C" -{ - // Now: fortran=-1, cudacpp=0 - // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... - constexpr unsigned int nimplC = 2; - constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } - const char* iimplC2TXT( int iimplC ) - { - const int iimplF = iimplC - 1; - switch( iimplF ) - { - case -1: return "Fortran"; break; - case +0: return "CudaCpp"; break; - default: assert( false ); break; - } - } - - static mgOnGpu::Timer program_timer; - static float program_totaltime = 0; - static mgOnGpu::Timer smatrix1_timer; - static float smatrix1_totaltime = 0; - static mgOnGpu::Timer smatrix1multi_timer[nimplC]; - static float smatrix1multi_totaltime[nimplC] = { 0 }; - static int smatrix1_counter = 0; - static int smatrix1multi_counter[nimplC] = { 0 }; - - void counters_initialise_() - { - program_timer.Start(); - return; - } - - void counters_smatrix1_start_() - { - smatrix1_counter++; - smatrix1_timer.Start(); - return; - } - - void counters_smatrix1_stop_() - { - smatrix1_totaltime += smatrix1_timer.GetDuration(); - return; - } - - void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_counter[iimplC] += *pnevt; - smatrix1multi_timer[iimplC].Start(); - return; - } - - void counters_smatrix1multi_stop_( const int* iimplF ) - { - const unsigned int iimplC = iimplF2C( *iimplF ); - smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); - return; - } - - void counters_finalise_() - { - program_totaltime += program_timer.GetDuration(); - // Write to stdout - float overhead_totaltime = program_totaltime; - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; - printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); - printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); - for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) - if( smatrix1multi_counter[iimplC] > 0 ) - printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", - iimplC2TXT( iimplC ), - iimplC + 1, - smatrix1multi_totaltime[iimplC], - smatrix1multi_counter[iimplC], - smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); - return; - } -} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc new file mode 120000 index 0000000000..06e29b46f9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/counters.cc @@ -0,0 +1 @@ +../counters.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc deleted file mode 100644 index 1d004923b9..0000000000 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. - -#include - -// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code -// Hence the trailing "_": 'call xxx()' links to xxx_ -// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html - -// NB2: This file also contains C++ code and is built using g++ -// Hence use 'extern "C"' to avoid name mangling by the C++ compiler -// See https://www.geeksforgeeks.org/extern-c-in-c - -#ifdef _OPENMP -extern "C" -{ - void ompnumthreads_not_set_means_one_thread_() - { - const int debuglevel = 0; // quiet(-1), info(0), debug(1) - ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file - } -} -#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc new file mode 120000 index 0000000000..645dc78215 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/ompnumthreads.cc @@ -0,0 +1 @@ +../ompnumthreads.cc \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc new file mode 100644 index 0000000000..3bbdec9387 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/counters.cc @@ -0,0 +1,98 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include "timer.h" +#define TIMERTYPE std::chrono::high_resolution_clock + +#include +#include + +// NB1: The C functions counters_xxx_ in this file are called by Fortran code +// Hence the trailing "_": 'call counters_end()' links to counters_end_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +extern "C" +{ + // Now: fortran=-1, cudacpp=0 + // Eventually: fortran=-1, cuda=0, cpp/none=1, cpp/sse4=2, etc... + constexpr unsigned int nimplC = 2; + constexpr unsigned int iimplF2C( int iimplF ) { return iimplF + 1; } + const char* iimplC2TXT( int iimplC ) + { + const int iimplF = iimplC - 1; + switch( iimplF ) + { + case -1: return "Fortran"; break; + case +0: return "CudaCpp"; break; + default: assert( false ); break; + } + } + + static mgOnGpu::Timer program_timer; + static float program_totaltime = 0; + static mgOnGpu::Timer smatrix1_timer; + static float smatrix1_totaltime = 0; + static mgOnGpu::Timer smatrix1multi_timer[nimplC]; + static float smatrix1multi_totaltime[nimplC] = { 0 }; + static int smatrix1_counter = 0; + static int smatrix1multi_counter[nimplC] = { 0 }; + + void counters_initialise_() + { + program_timer.Start(); + return; + } + + void counters_smatrix1_start_() + { + smatrix1_counter++; + smatrix1_timer.Start(); + return; + } + + void counters_smatrix1_stop_() + { + smatrix1_totaltime += smatrix1_timer.GetDuration(); + return; + } + + void counters_smatrix1multi_start_( const int* iimplF, const int* pnevt ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_counter[iimplC] += *pnevt; + smatrix1multi_timer[iimplC].Start(); + return; + } + + void counters_smatrix1multi_stop_( const int* iimplF ) + { + const unsigned int iimplC = iimplF2C( *iimplF ); + smatrix1multi_totaltime[iimplC] += smatrix1multi_timer[iimplC].GetDuration(); + return; + } + + void counters_finalise_() + { + program_totaltime += program_timer.GetDuration(); + // Write to stdout + float overhead_totaltime = program_totaltime; + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) overhead_totaltime -= smatrix1multi_totaltime[iimplC]; + printf( " [COUNTERS] PROGRAM TOTAL : %9.4fs\n", program_totaltime ); + printf( " [COUNTERS] Fortran Overhead ( 0 ) : %9.4fs\n", overhead_totaltime ); + for( unsigned int iimplC = 0; iimplC < nimplC; iimplC++ ) + if( smatrix1multi_counter[iimplC] > 0 ) + printf( " [COUNTERS] %7s MEs ( %1d ) : %9.4fs for %8d events => throughput is %8.2E events/s\n", + iimplC2TXT( iimplC ), + iimplC + 1, + smatrix1multi_totaltime[iimplC], + smatrix1multi_counter[iimplC], + smatrix1multi_counter[iimplC] / smatrix1multi_totaltime[iimplC] ); + return; + } +} diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/ompnumthreads.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/ompnumthreads.cc new file mode 100644 index 0000000000..1d004923b9 --- /dev/null +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/ompnumthreads.cc @@ -0,0 +1,25 @@ +// Copyright (C) 2020-2023 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Dec 2022) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin. + +#include + +// NB1: The C function ompnumthreadsNotSetMeansOneThread_ is called by Fortran code +// Hence the trailing "_": 'call xxx()' links to xxx_ +// See http://www.yolinux.com/TUTORIALS/LinuxTutorialMixingFortranAndC.html + +// NB2: This file also contains C++ code and is built using g++ +// Hence use 'extern "C"' to avoid name mangling by the C++ compiler +// See https://www.geeksforgeeks.org/extern-c-in-c + +#ifdef _OPENMP +extern "C" +{ + void ompnumthreads_not_set_means_one_thread_() + { + const int debuglevel = 0; // quiet(-1), info(0), debug(1) + ompnumthreadsNotSetMeansOneThread( debuglevel ); // call the inline C++ function defined in the .h file + } +} +#endif diff --git a/epochX/cudacpp/pp_tt012j.mad/mg5.in b/epochX/cudacpp/pp_tt012j.mad/mg5.in index 6bc40e7968..c22e2d6100 100644 --- a/epochX/cudacpp/pp_tt012j.mad/mg5.in +++ b/epochX/cudacpp/pp_tt012j.mad/mg5.in @@ -4,4 +4,4 @@ define j = p generate p p > t t~ @0 add process p p > t t~ j @1 add process p p > t t~ j j @2 -output madevent pp_tt012j.mad --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp +output madevent_simd pp_tt012j.mad --hel_recycling=False --vector_size=32