Skip to content

Commit 3813568

Browse files
authored
Merge pull request #7 from AIComputing101/coketaste/profile-cuda
Fix the Makefile for profile on cuda
2 parents a3c4572 + 1bfa312 commit 3813568

File tree

9 files changed

+19
-18
lines changed

9 files changed

+19
-18
lines changed

modules/module1/examples/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ ifeq ($(BUILD_CUDA),1)
206206
@for target in $(CUDA_TARGETS); do \
207207
if [ -f $$target ]; then \
208208
echo "Profiling $$target..."; \
209-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
209+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
210210
fi; \
211211
done
212212
endif

modules/module2/examples/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ ifeq ($(BUILD_CUDA),1)
159159
@for target in $(CUDA_TARGETS); do \
160160
if [ -f $$target ]; then \
161161
echo "Profiling $$target..."; \
162-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
162+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
163163
fi; \
164164
done
165165
endif
@@ -260,8 +260,8 @@ profile_memory: cuda
260260
@echo " ncu --metrics l1tex__throughput.avg.pct_of_peak_sustained_elapsed ./02_memory_coalescing_cuda"
261261
@echo " ncu --metrics dram__bytes_read.sum,dram__bytes_write.sum ./05_memory_bandwidth_optimization_cuda"
262262
@echo ""
263-
@echo "Legacy nvprof (if available):"
264-
@echo " nvprof --metrics achieved_occupancy,gld_efficiency,gst_efficiency ./03_texture_memory_cuda"
263+
@echo "Modern NVIDIA Nsight Systems:"
264+
@echo " nsys profile --cuda-event-trace=false -o profile.nsys-rep ./03_texture_memory_cuda"
265265

266266

267267

modules/module3/examples/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ ifeq ($(BUILD_CUDA),1)
164164
@for target in $(CUDA_TARGETS); do \
165165
if [ -f $$target ]; then \
166166
echo "Profiling $$target..."; \
167-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
167+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
168168
fi; \
169169
done
170170
endif

modules/module4/examples/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ ifeq ($(BUILD_CUDA),1)
173173
@for target in $(CUDA_TARGETS); do \
174174
if [ -f $$target ]; then \
175175
echo "Profiling $$target..."; \
176-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
176+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
177177
fi; \
178178
done
179179
endif
@@ -419,9 +419,9 @@ profile_examples: all
419419
@echo " nsys profile --trace=cuda,nvtx --stats=true ./02_multi_gpu_programming"
420420
@echo " nsys profile --trace=cuda,nvtx,osrt --stats=true ./03_unified_memory"
421421
@echo ""
422-
@echo "Legacy nvprof:"
423-
@echo " nvprof --print-gpu-trace ./01_cuda_streams_basics"
424-
@echo " nvprof --print-api-trace ./02_multi_gpu_programming"
422+
@echo "Modern NVIDIA Nsight Systems:"
423+
@echo " nsys profile --cuda-event-trace=false -o trace.nsys-rep ./01_cuda_streams_basics"
424+
@echo " nsys profile --cuda-event-trace=false -o multi_gpu.nsys-rep ./02_multi_gpu_programming"
425425
@echo ""
426426
@echo "Multi-GPU Analysis:"
427427
@echo " nsys profile --trace=cuda,nvtx --stats=true -o multi_gpu_trace ./02_multi_gpu_programming"

modules/module5/examples/Makefile

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ HIP_DEBUG_FLAGS += --offload-arch=$(GPU_ARCH)
5757
CXX_FLAGS = -std=c++17 -O3 -fopenmp
5858

5959
# Profiling flags
60-
NVPROF_FLAGS = --print-gpu-trace --log-file %s.nvprof
60+
NSYS_FLAGS = --cuda-event-trace=false --force-overwrite
6161
ROCPROF_FLAGS = --hip-trace --stats --output-file %s.csv
6262

6363
# Directories
@@ -144,7 +144,7 @@ ifeq ($(BUILD_CUDA),1)
144144
@for target in $(CUDA_TARGETS); do \
145145
if [ -f $$target ]; then \
146146
echo "Profiling $$target..."; \
147-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
147+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
148148
fi; \
149149
done
150150
endif
@@ -171,11 +171,12 @@ run: all
171171
# Performance profiling targets
172172
.PHONY: profile-cuda
173173
profile-cuda: $(CUDA_TARGETS)
174-
@echo "Profiling CUDA examples with nvprof..."
174+
@echo "Profiling CUDA examples with nsys..."
175+
@mkdir -p $(PROFILE_DIR)
175176
@for target in $(CUDA_TARGETS); do \
176177
if [ -f $$target ]; then \
177178
echo "Profiling $$target..."; \
178-
nvprof $(NVPROF_FLAGS) $$target > $(PROFILE_DIR)/$$(basename $$target).nvprof 2>&1; \
179+
nsys profile $(NSYS_FLAGS) -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target > $(PROFILE_DIR)/$$(basename $$target).nsys.log 2>&1; \
179180
fi; \
180181
done
181182

@@ -330,7 +331,7 @@ help:
330331
@echo " validate - Validate optimization correctness"
331332
@echo ""
332333
@echo "Profiling Targets:"
333-
@echo " profile-cuda - Profile CUDA examples with nvprof"
334+
@echo " profile-cuda - Profile CUDA examples with nsys"
334335
@echo " profile-hip - Profile HIP examples with rocprof"
335336
@echo " profile-detailed-cuda - Detailed profiling with Nsight Compute"
336337
@echo " memcheck-cuda - Run CUDA memory checker"

modules/module6/examples/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ ifeq ($(BUILD_CUDA),1)
142142
@for target in $(CUDA_TARGETS); do \
143143
if [ -f $$target ]; then \
144144
echo "Profiling $$target..."; \
145-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
145+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
146146
fi; \
147147
done
148148
endif

modules/module7/examples/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ ifeq ($(BUILD_CUDA),1)
150150
@for target in $(CUDA_TARGETS); do \
151151
if [ -f $$target ]; then \
152152
echo "Profiling $$target..."; \
153-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
153+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
154154
fi; \
155155
done
156156
endif

modules/module8/examples/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ ifeq ($(BUILD_CUDA),1)
207207
@for target in $(CUDA_TARGETS); do \
208208
if [ -f $$target ]; then \
209209
echo "Profiling $$target..."; \
210-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
210+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
211211
fi; \
212212
done
213213
endif

modules/module9/examples/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ ifeq ($(BUILD_CUDA),1)
222222
@for target in $(CUDA_TARGETS); do \
223223
if [ -f $$target ]; then \
224224
echo "Profiling $$target..."; \
225-
nvprof --csv -o $(PROFILE_DIR)/$$(basename $$target).csv $$target 2>/dev/null || echo "nvprof completed"; \
225+
nsys profile --cuda-event-trace=false -o $(PROFILE_DIR)/$$(basename $$target).nsys-rep $$target 2>/dev/null || echo "nsys completed"; \
226226
fi; \
227227
done
228228
endif

0 commit comments

Comments
 (0)