Skip to content

Commit 429ef74

Browse files
committed
yank CPU backend
1 parent a89c07c commit 429ef74

File tree

2 files changed

+11
-155
lines changed

2 files changed

+11
-155
lines changed

src/KernelAbstractions.jl

Lines changed: 7 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -492,35 +492,6 @@ Abstract type for all GPU based KernelAbstractions backends.
492492
"""
493493
abstract type GPU <: Backend end
494494

495-
"""
496-
CPU(; static=false)
497-
498-
Instantiate a CPU (multi-threaded) backend.
499-
500-
## Options:
501-
- `static`: Uses a static thread assignment, this can be beneficial for NUMA aware code.
502-
Defaults to false.
503-
504-
!!! note
505-
`CPU` will be aliased to `POCLBackend()` on KernelAbstractions v1.0
506-
"""
507-
struct CPU <: Backend
508-
static::Bool
509-
CPU(; static::Bool = false) = new(static)
510-
end
511-
512-
"""
513-
isgpu(::Backend)::Bool
514-
515-
Returns true for all [`GPU`](@ref) backends.
516-
517-
!!! note
518-
`isgpu` will be removed in KernelAbstractions v1.0
519-
"""
520-
isgpu(::GPU) = true
521-
isgpu(::CPU) = false
522-
523-
524495
"""
525496
get_backend(A::AbstractArray)::Backend
526497
@@ -538,7 +509,6 @@ get_backend(A::AbstractArray) = get_backend(parent(A))
538509
# Define:
539510
# adapt_storage(::Backend, a::Array) = adapt(BackendArray, a)
540511
# adapt_storage(::Backend, a::BackendArray) = a
541-
Adapt.adapt_storage(::CPU, a::Array) = a
542512

543513
"""
544514
allocate(::Backend, Type, dims...)::AbstractArray
@@ -758,7 +728,7 @@ Partition a kernel for the given ndrange and workgroupsize.
758728
return iterspace, dynamic
759729
end
760730

761-
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName}
731+
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: GPU, S <: _Size, NDRange <: _Size, XPUName}
762732
return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name)
763733
end
764734

@@ -775,6 +745,10 @@ include("compiler.jl")
775745
function __workitems_iterspace end
776746
function __validindex end
777747

748+
# for reflection
749+
function mkcontext end
750+
function launch_config end
751+
778752
include("macros.jl")
779753

780754
###
@@ -844,14 +818,12 @@ end
844818
end
845819

846820
# CPU backend
847-
848-
include("cpu.jl")
849-
850-
# Future-CPU backend
851821
include("pocl/pocl.jl")
852822
using .POCL
853823
export POCLBackend
854824

825+
const CPU = POCLBackend
826+
855827
# precompile
856828
PrecompileTools.@compile_workload begin
857829
@eval begin

src/macros.jl

Lines changed: 4 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -31,19 +31,6 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
3131
constargs[i] = false
3232
end
3333

34-
# create two functions
35-
# 1. GPU function
36-
# 2. CPU function with work-group loops inserted
37-
#
38-
# Without the deepcopy we might accidentially modify expr shared between CPU and GPU
39-
cpu_name = Symbol(:cpu_, name)
40-
if generate_cpu
41-
def_cpu = deepcopy(def)
42-
def_cpu[:name] = cpu_name
43-
transform_cpu!(def_cpu, constargs, force_inbounds)
44-
cpu_function = combinedef(def_cpu)
45-
end
46-
4734
def_gpu = deepcopy(def)
4835
def_gpu[:name] = gpu_name = Symbol(:gpu_, name)
4936
transform_gpu!(def_gpu, constargs, force_inbounds)
@@ -56,24 +43,12 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
5643
$name(dev, size) = $name(dev, $StaticSize(size), $DynamicSize())
5744
$name(dev, size, range) = $name(dev, $StaticSize(size), $StaticSize(range))
5845
function $name(dev::Dev, sz::S, range::NDRange) where {Dev, S <: $_Size, NDRange <: $_Size}
59-
if $isgpu(dev)
60-
return $construct(dev, sz, range, $gpu_name)
61-
else
62-
if $generate_cpu
63-
return $construct(dev, sz, range, $cpu_name)
64-
else
65-
error("This kernel is unavailable for backend CPU")
66-
end
67-
end
46+
return $construct(dev, sz, range, $gpu_name)
6847
end
6948
end
7049
end
7150

72-
if generate_cpu
73-
return Expr(:block, esc(cpu_function), esc(gpu_function), esc(constructors))
74-
else
75-
return Expr(:block, esc(gpu_function), esc(constructors))
76-
end
51+
return Expr(:block, esc(gpu_function), esc(constructors))
7752
end
7853

7954
# The easy case, transform the function for GPU execution
@@ -94,42 +69,7 @@ function transform_gpu!(def, constargs, force_inbounds)
9469
if force_inbounds
9570
push!(new_stmts, Expr(:inbounds, true))
9671
end
97-
append!(new_stmts, split(emit_gpu, body.args))
98-
if force_inbounds
99-
push!(new_stmts, Expr(:inbounds, :pop))
100-
end
101-
push!(new_stmts, Expr(:popaliasscope))
102-
push!(new_stmts, :(return nothing))
103-
def[:body] = Expr(
104-
:let,
105-
Expr(:block, let_constargs...),
106-
Expr(:block, new_stmts...),
107-
)
108-
return
109-
end
110-
111-
# The hard case, transform the function for CPU execution
112-
# - mark constant arguments by applying `constify`.
113-
# - insert aliasscope markers
114-
# - insert implied loop bodys
115-
# - handle indicies
116-
# - hoist workgroup definitions
117-
# - hoist uniform variables
118-
function transform_cpu!(def, constargs, force_inbounds)
119-
let_constargs = Expr[]
120-
for (i, arg) in enumerate(def[:args])
121-
if constargs[i]
122-
push!(let_constargs, :($arg = $constify($arg)))
123-
end
124-
end
125-
pushfirst!(def[:args], :__ctx__)
126-
new_stmts = Expr[]
127-
body = MacroTools.flatten(def[:body])
128-
push!(new_stmts, Expr(:aliasscope))
129-
if force_inbounds
130-
push!(new_stmts, Expr(:inbounds, true))
131-
end
132-
append!(new_stmts, split(emit_cpu, body.args))
72+
append!(new_stmts, split(body.args))
13373
if force_inbounds
13474
push!(new_stmts, Expr(:inbounds, :pop))
13575
end
@@ -169,7 +109,6 @@ end
169109

170110
# TODO proper handling of LineInfo
171111
function split(
172-
emit,
173112
stmts,
174113
indicies = Any[], private = Set{Symbol}(),
175114
)
@@ -249,62 +188,7 @@ function split(
249188
return new_stmts
250189
end
251190

252-
function emit_cpu(loop)
253-
idx = gensym(:I)
254-
for stmt in loop.indicies
255-
# splice index into the i = @index(Cartesian, $idx)
256-
@assert stmt.head === :(=)
257-
rhs = stmt.args[2]
258-
push!(rhs.args, idx)
259-
end
260-
stmts = Any[]
261-
append!(stmts, loop.allocations)
262-
263-
# private_allocations turn into lhs = ntuple(i->rhs, length(__workitems_iterspace()))
264-
N = gensym(:N)
265-
push!(stmts, :($N = length($__workitems_iterspace(__ctx__))))
266-
267-
for stmt in loop.private_allocations
268-
if @capture(stmt, lhs_ = rhs_)
269-
push!(stmts, :($lhs = ntuple(_ -> $rhs, $N)))
270-
else
271-
error("@private $stmt not an assignment")
272-
end
273-
end
274-
275-
# don't emit empty loops
276-
if !(isempty(loop.stmts) || all(s -> s isa LineNumberNode, loop.stmts))
277-
body = Expr(:block, loop.stmts...)
278-
body = postwalk(body) do expr
279-
if @capture(expr, lhs_ = rhs_)
280-
if lhs in loop.private
281-
error("Can't assign to variables marked private")
282-
end
283-
elseif @capture(expr, A_[i__])
284-
if A in loop.private
285-
return :($A[$__index_Local_Linear(__ctx__, $(idx))][$(i...)])
286-
end
287-
elseif expr isa Symbol
288-
if expr in loop.private
289-
return :($expr[$__index_Local_Linear(__ctx__, $(idx))])
290-
end
291-
end
292-
return expr
293-
end
294-
loopexpr = quote
295-
for $idx in $__workitems_iterspace(__ctx__)
296-
$__validindex(__ctx__, $idx) || continue
297-
$(loop.indicies...)
298-
$(unblock(body))
299-
end
300-
end
301-
push!(stmts, loopexpr)
302-
end
303-
304-
return unblock(Expr(:block, stmts...))
305-
end
306-
307-
function emit_gpu(loop)
191+
function emit(loop)
308192
stmts = Any[]
309193
append!(stmts, loop.allocations)
310194
for stmt in loop.private_allocations

0 commit comments

Comments
 (0)