@@ -31,19 +31,6 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
31
31
constargs[i] = false
32
32
end
33
33
34
- # create two functions
35
- # 1. GPU function
36
- # 2. CPU function with work-group loops inserted
37
- #
38
- # Without the deepcopy we might accidentially modify expr shared between CPU and GPU
39
- cpu_name = Symbol (:cpu_ , name)
40
- if generate_cpu
41
- def_cpu = deepcopy (def)
42
- def_cpu[:name ] = cpu_name
43
- transform_cpu! (def_cpu, constargs, force_inbounds)
44
- cpu_function = combinedef (def_cpu)
45
- end
46
-
47
34
def_gpu = deepcopy (def)
48
35
def_gpu[:name ] = gpu_name = Symbol (:gpu_ , name)
49
36
transform_gpu! (def_gpu, constargs, force_inbounds)
@@ -56,24 +43,12 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
56
43
$ name (dev, size) = $ name (dev, $ StaticSize (size), $ DynamicSize ())
57
44
$ name (dev, size, range) = $ name (dev, $ StaticSize (size), $ StaticSize (range))
58
45
function $name (dev:: Dev , sz:: S , range:: NDRange ) where {Dev, S <: $_Size , NDRange <: $_Size }
59
- if $ isgpu (dev)
60
- return $ construct (dev, sz, range, $ gpu_name)
61
- else
62
- if $ generate_cpu
63
- return $ construct (dev, sz, range, $ cpu_name)
64
- else
65
- error (" This kernel is unavailable for backend CPU" )
66
- end
67
- end
46
+ return $ construct (dev, sz, range, $ gpu_name)
68
47
end
69
48
end
70
49
end
71
50
72
- if generate_cpu
73
- return Expr (:block , esc (cpu_function), esc (gpu_function), esc (constructors))
74
- else
75
- return Expr (:block , esc (gpu_function), esc (constructors))
76
- end
51
+ return Expr (:block , esc (gpu_function), esc (constructors))
77
52
end
78
53
79
54
# The easy case, transform the function for GPU execution
@@ -94,42 +69,7 @@ function transform_gpu!(def, constargs, force_inbounds)
94
69
if force_inbounds
95
70
push! (new_stmts, Expr (:inbounds , true ))
96
71
end
97
- append! (new_stmts, split (emit_gpu, body. args))
98
- if force_inbounds
99
- push! (new_stmts, Expr (:inbounds , :pop ))
100
- end
101
- push! (new_stmts, Expr (:popaliasscope ))
102
- push! (new_stmts, :(return nothing ))
103
- def[:body ] = Expr (
104
- :let ,
105
- Expr (:block , let_constargs... ),
106
- Expr (:block , new_stmts... ),
107
- )
108
- return
109
- end
110
-
111
- # The hard case, transform the function for CPU execution
112
- # - mark constant arguments by applying `constify`.
113
- # - insert aliasscope markers
114
- # - insert implied loop bodys
115
- # - handle indicies
116
- # - hoist workgroup definitions
117
- # - hoist uniform variables
118
- function transform_cpu! (def, constargs, force_inbounds)
119
- let_constargs = Expr[]
120
- for (i, arg) in enumerate (def[:args ])
121
- if constargs[i]
122
- push! (let_constargs, :($ arg = $ constify ($ arg)))
123
- end
124
- end
125
- pushfirst! (def[:args ], :__ctx__ )
126
- new_stmts = Expr[]
127
- body = MacroTools. flatten (def[:body ])
128
- push! (new_stmts, Expr (:aliasscope ))
129
- if force_inbounds
130
- push! (new_stmts, Expr (:inbounds , true ))
131
- end
132
- append! (new_stmts, split (emit_cpu, body. args))
72
+ append! (new_stmts, split (body. args))
133
73
if force_inbounds
134
74
push! (new_stmts, Expr (:inbounds , :pop ))
135
75
end
169
109
170
110
# TODO proper handling of LineInfo
171
111
function split (
172
- emit,
173
112
stmts,
174
113
indicies = Any[], private = Set {Symbol} (),
175
114
)
@@ -249,62 +188,7 @@ function split(
249
188
return new_stmts
250
189
end
251
190
252
- function emit_cpu (loop)
253
- idx = gensym (:I )
254
- for stmt in loop. indicies
255
- # splice index into the i = @index(Cartesian, $idx)
256
- @assert stmt. head === :(= )
257
- rhs = stmt. args[2 ]
258
- push! (rhs. args, idx)
259
- end
260
- stmts = Any[]
261
- append! (stmts, loop. allocations)
262
-
263
- # private_allocations turn into lhs = ntuple(i->rhs, length(__workitems_iterspace()))
264
- N = gensym (:N )
265
- push! (stmts, :($ N = length ($ __workitems_iterspace (__ctx__))))
266
-
267
- for stmt in loop. private_allocations
268
- if @capture (stmt, lhs_ = rhs_)
269
- push! (stmts, :($ lhs = ntuple (_ -> $ rhs, $ N)))
270
- else
271
- error (" @private $stmt not an assignment" )
272
- end
273
- end
274
-
275
- # don't emit empty loops
276
- if ! (isempty (loop. stmts) || all (s -> s isa LineNumberNode, loop. stmts))
277
- body = Expr (:block , loop. stmts... )
278
- body = postwalk (body) do expr
279
- if @capture (expr, lhs_ = rhs_)
280
- if lhs in loop. private
281
- error (" Can't assign to variables marked private" )
282
- end
283
- elseif @capture (expr, A_[i__])
284
- if A in loop. private
285
- return :($ A[$ __index_Local_Linear (__ctx__, $ (idx))][$ (i... )])
286
- end
287
- elseif expr isa Symbol
288
- if expr in loop. private
289
- return :($ expr[$ __index_Local_Linear (__ctx__, $ (idx))])
290
- end
291
- end
292
- return expr
293
- end
294
- loopexpr = quote
295
- for $ idx in $ __workitems_iterspace (__ctx__)
296
- $ __validindex (__ctx__, $ idx) || continue
297
- $ (loop. indicies... )
298
- $ (unblock (body))
299
- end
300
- end
301
- push! (stmts, loopexpr)
302
- end
303
-
304
- return unblock (Expr (:block , stmts... ))
305
- end
306
-
307
- function emit_gpu (loop)
191
+ function emit (loop)
308
192
stmts = Any[]
309
193
append! (stmts, loop. allocations)
310
194
for stmt in loop. private_allocations
0 commit comments