@@ -57,7 +57,7 @@ HIP_DEBUG_FLAGS += --offload-arch=$(GPU_ARCH)
57
57
CXX_FLAGS = -std=c++17 -O3 -fopenmp
58
58
59
59
# Profiling flags
60
- NVPROF_FLAGS = --print-gpu -trace --log-file %s.nvprof
60
+ NSYS_FLAGS = --cuda-event -trace=false --force-overwrite
61
61
ROCPROF_FLAGS = --hip-trace --stats --output-file %s.csv
62
62
63
63
# Directories
@@ -144,7 +144,7 @@ ifeq ($(BUILD_CUDA),1)
144
144
@for target in $(CUDA_TARGETS); do \
145
145
if [ -f $$target ]; then \
146
146
echo "Profiling $$target..."; \
147
- nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
147
+ nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
148
148
fi; \
149
149
done
150
150
endif
@@ -171,11 +171,12 @@ run: all
171
171
# Performance profiling targets
172
172
.PHONY : profile-cuda
173
173
profile-cuda : $(CUDA_TARGETS )
174
- @echo " Profiling CUDA examples with nvprof..."
174
+ @echo " Profiling CUDA examples with nsys..."
175
+ @mkdir -p $(PROFILE_DIR )
175
176
@for target in $(CUDA_TARGETS ) ; do \
176
177
if [ -f $$ target ]; then \
177
178
echo " Profiling $$ target..." ; \
178
- nvprof $( NVPROF_FLAGS ) $$ target > $(PROFILE_DIR ) /$$(basename $$target) .nvprof 2>&1 ; \
179
+ nsys profile $( NSYS_FLAGS ) -o $( PROFILE_DIR ) / $$( basename $$ target) .nsys-rep $$ target > $(PROFILE_DIR ) /$$(basename $$target) .nsys.log 2>&1 ; \
179
180
fi ; \
180
181
done
181
182
@@ -330,7 +331,7 @@ help:
330
331
@echo " validate - Validate optimization correctness"
331
332
@echo " "
332
333
@echo " Profiling Targets:"
333
- @echo " profile-cuda - Profile CUDA examples with nvprof "
334
+ @echo " profile-cuda - Profile CUDA examples with nsys "
334
335
@echo " profile-hip - Profile HIP examples with rocprof"
335
336
@echo " profile-detailed-cuda - Detailed profiling with Nsight Compute"
336
337
@echo " memcheck-cuda - Run CUDA memory checker"
0 commit comments