Skip to content

Commit a067434

Browse files
authored
multiple gpus on one PE using pytorch and hapi callbacks (#303)
* multiple gpus on one PE using pytorch and hapi callbacks * print future ids, change if statement
1 parent 293b2e8 commit a067434

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
'''
2+
Use one process to launch two torch matmul kernels, each on a separate device
3+
A HAPI callback is registered for each kernel
4+
which triggers two different methods
5+
Must run this program with 2 different gpus
6+
'''
7+
8+
from charm4py import charm
9+
import torch
10+
11+
def main(args):
12+
13+
N=10000
14+
15+
if not torch.cuda.is_available():
16+
print("Error: No GPU detected")
17+
charm.exit()
18+
if torch.cuda.device_count() < 2:
19+
print("Error: fewer than 2 GPUs, only " + str(torch.cuda.device_count()) + " gpus found")
20+
charm.exit()
21+
22+
cuda0 = torch.device('cuda:0') #first device
23+
cuda1 = torch.device('cuda:1') #second device
24+
25+
stream0 = torch.cuda.Stream(device=cuda0)
26+
stream1 = torch.cuda.Stream(device=cuda1)
27+
28+
#allocate tensors on device 0
29+
with cuda0:
30+
a0 = torch.randn(N,N)
31+
b0 = torch.randn(N,N)
32+
c0 = torch.mm(a0, b0)
33+
34+
#allocate tensors on device 1
35+
with cuda1:
36+
a1 = torch.randn(N,N)
37+
b1 = torch.randn(N,N)
38+
c1 = torch.mm(a1, b1)
39+
40+
#create callbacks (should we implement callbacks to entry methods?)
41+
future0 = charm.Future()
42+
future1 = charm.Future()
43+
print("Future 0 id: ", future0.fid)
44+
print("Future 1 id: ", future1.fid)
45+
futures = [future0, future1]
46+
charm.hapiAddCudaCallback(stream0.cuda_stream, future0)
47+
charm.hapiAddCudaCallback(stream1.cuda_stream, future1)
48+
49+
for fut_object in charm.iwait(futures):
50+
print('One device kernel complete, id: ', fut_object.fid)
51+
52+
charm.exit()
53+
54+
charm.start(main)

0 commit comments

Comments
 (0)