187187
188188function finish_ir! (@nospecialize (job:: CompilerJob{PTXCompilerTarget} ),
189189 mod:: LLVM.Module , entry:: LLVM.Function )
190- lower_trap! (mod)
191190 for f in functions (mod)
192191 lower_unreachable! (f)
193192 end
246245
247246# # LLVM passes
248247
249- # replace calls to `trap` with inline assembly calling `exit`, which isn't fatal
250- function lower_trap! (mod:: LLVM.Module )
251- job = current_job:: CompilerJob
252- changed = false
253- @timeit_debug to " lower trap" begin
254-
255- if haskey (functions (mod), " llvm.trap" )
256- trap = functions (mod)[" llvm.trap" ]
257-
258- # inline assembly to exit a thread
259- exit_ft = LLVM. FunctionType (LLVM. VoidType ())
260- exit = InlineAsm (exit_ft, " exit;" , " " , true )
261-
262- for use in uses (trap)
263- val = user (use)
264- if isa (val, LLVM. CallInst)
265- @dispose builder= IRBuilder () begin
266- position! (builder, val)
267- call! (builder, exit_ft, exit)
268- end
269- unsafe_delete! (LLVM. parent (val), val)
270- changed = true
271- end
272- end
273- end
274-
275- end
276- return changed
277- end
278-
279248# lower `unreachable` to `exit` so that the emitted PTX has correct control flow
280249#
281250# During back-end compilation, `ptxas` inserts instructions to manage the harware's
@@ -328,10 +297,14 @@ end
328297# `bar.sync` cannot be executed divergently on Pascal hardware or earlier.
329298#
330299# To avoid these fall-through successors that change the control flow,
331- # we replace `unreachable` instructions with a call to `exit`. This informs
332- # `ptxas` that the thread exits, and allows it to correctly construct a CFG,
333- # and consequently correctly determine the divergence regions as intended.
300+ # we replace `unreachable` instructions with a call to `trap` and `exit`. This
301+ # informs `ptxas` that the thread exits, and allows it to correctly construct a
302+ # CFG, and consequently correctly determine the divergence regions as intended.
303+ # Note that we first emit a call to `trap`, so that the behaviour is the same
304+ # as before.
334305function lower_unreachable! (f:: LLVM.Function )
306+ mod = LLVM. parent (f)
307+
335308 # TODO :
336309 # - if unreachable blocks have been merged, we still may be jumping from different
337310 # divergent regions, potentially causing the same problem as above:
@@ -375,6 +348,12 @@ function lower_unreachable!(f::LLVM.Function)
375348 # inline assembly to exit a thread
376349 exit_ft = LLVM. FunctionType (LLVM. VoidType ())
377350 exit = InlineAsm (exit_ft, " exit;" , " " , true )
351+ trap_ft = LLVM. FunctionType (LLVM. VoidType ())
352+ trap = if haskey (functions (mod), " llvm.trap" )
353+ functions (mod)[" llvm.trap" ]
354+ else
355+ LLVM. Function (mod, " llvm.trap" , trap_ft)
356+ end
378357
379358 # rewrite the unreachable terminators
380359 @dispose builder= IRBuilder () begin
@@ -384,6 +363,7 @@ function lower_unreachable!(f::LLVM.Function)
384363 @assert inst isa LLVM. UnreachableInst
385364
386365 position! (builder, inst)
366+ call! (builder, trap_ft, trap)
387367 call! (builder, exit_ft, exit)
388368 end
389369 end
0 commit comments