Skip to content

Commit f3d6be0

Browse files
authored
Merge pull request #9 from SymbolicML/loop-vectorization
Optionally use `LoopVectorization.@turbo` in dynamic expression evaluation scheme
2 parents cc313ad + cc41e61 commit f3d6be0

File tree

7 files changed

+223
-139
lines changed

7 files changed

+223
-139
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ version = "0.3.0"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
8+
LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890"
89
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
910
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1011
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"

src/EvaluateEquation.jl

Lines changed: 80 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
module EvaluateEquationModule
22

3+
import LoopVectorization: @turbo, indices
34
import ..EquationModule: Node, string_tree
45
import ..OperatorEnumModule: OperatorEnum, GenericOperatorEnum
5-
import ..UtilsModule: @return_on_false, is_bad_array, vals
6+
import ..UtilsModule: @return_on_false, @maybe_turbo, is_bad_array, vals
67
import ..EquationUtilsModule: is_constant
78

89
macro return_on_check(val, T, n)
@@ -27,7 +28,7 @@ macro return_on_nonfinite_array(array, T, n)
2728
end
2829

2930
"""
30-
eval_tree_array(tree::Node, cX::AbstractMatrix{T}, operators::OperatorEnum)
31+
eval_tree_array(tree::Node, cX::AbstractMatrix{T}, operators::OperatorEnum; turbo::Bool)
3132
3233
Evaluate a binary tree (equation) over a given input data matrix. The
3334
operators contain all of the operators used. This function fuses doublets
@@ -51,6 +52,7 @@ which speed up evaluation significantly.
5152
- `tree::Node`: The root node of the tree to evaluate.
5253
- `cX::AbstractMatrix{T}`: The input data to evaluate the tree on.
5354
- `operators::OperatorEnum`: The operators used in the tree.
55+
- `turbo::Bool`: Use `LoopVectorization.@turbo` for faster evaluation.
5456
5557
# Returns
5658
- `(output, complete)::Tuple{AbstractVector{T}, Bool}`: the result,
@@ -60,31 +62,36 @@ which speed up evaluation significantly.
6062
to the equation.
6163
"""
6264
function eval_tree_array(
63-
tree::Node{T}, cX::AbstractMatrix{T}, operators::OperatorEnum
65+
tree::Node{T}, cX::AbstractMatrix{T}, operators::OperatorEnum; turbo::Bool=false
6466
)::Tuple{AbstractVector{T},Bool} where {T<:Real}
6567
n = size(cX, 2)
66-
result, finished = _eval_tree_array(tree, cX, operators)
68+
if turbo
69+
@assert T in (Float32, Float64)
70+
end
71+
result, finished = _eval_tree_array(
72+
tree, cX, operators, (turbo ? Val(true) : Val(false))
73+
)
6774
@return_on_false finished result
6875
@return_on_nonfinite_array result T n
6976
return result, finished
7077
end
7178
function eval_tree_array(
72-
tree::Node{T1}, cX::AbstractMatrix{T2}, operators::OperatorEnum
79+
tree::Node{T1}, cX::AbstractMatrix{T2}, operators::OperatorEnum; turbo::Bool=false
7380
) where {T1<:Real,T2<:Real}
7481
T = promote_type(T1, T2)
7582
@warn "Warning: eval_tree_array received mixed types: tree=$(T1) and data=$(T2)."
7683
tree = convert(Node{T}, tree)
7784
cX = convert(AbstractMatrix{T}, cX)
78-
return eval_tree_array(tree, cX, operators)
85+
return eval_tree_array(tree, cX, operators; turbo=turbo)
7986
end
8087

8188
function _eval_tree_array(
82-
tree::Node{T}, cX::AbstractMatrix{T}, operators::OperatorEnum
83-
)::Tuple{AbstractVector{T},Bool} where {T<:Real}
89+
tree::Node{T}, cX::AbstractMatrix{T}, operators::OperatorEnum, ::Val{turbo}
90+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,turbo}
8491
# First, we see if there are only constants in the tree - meaning
8592
# we can just return the constant result.
8693
if tree.degree == 0
87-
return deg0_eval(tree, cX, operators)
94+
return deg0_eval(tree, cX)
8895
elseif is_constant(tree)
8996
# Speed hack for constant trees.
9097
result, flag = _eval_constant_tree(tree, operators)
@@ -93,46 +100,54 @@ function _eval_tree_array(
93100
elseif tree.degree == 1
94101
if tree.l.degree == 2 && tree.l.l.degree == 0 && tree.l.r.degree == 0
95102
# op(op2(x, y)), where x, y, z are constants or variables.
96-
return deg1_l2_ll0_lr0_eval(tree, cX, vals[tree.op], vals[tree.l.op], operators)
103+
return deg1_l2_ll0_lr0_eval(
104+
tree, cX, vals[tree.op], vals[tree.l.op], operators, Val(turbo)
105+
)
97106
elseif tree.l.degree == 1 && tree.l.l.degree == 0
98107
# op(op2(x)), where x is a constant or variable.
99-
return deg1_l1_ll0_eval(tree, cX, vals[tree.op], vals[tree.l.op], operators)
108+
return deg1_l1_ll0_eval(
109+
tree, cX, vals[tree.op], vals[tree.l.op], operators, Val(turbo)
110+
)
100111
else
101112
# op(x), for any x.
102-
return deg1_eval(tree, cX, vals[tree.op], operators)
113+
return deg1_eval(tree, cX, vals[tree.op], operators, Val(turbo))
103114
end
104115
elseif tree.degree == 2
105116
# TODO - add op(op2(x, y), z) and op(x, op2(y, z))
106117
if tree.l.degree == 0 && tree.r.degree == 0
107118
# op(x, y), where x, y are constants or variables.
108-
return deg2_l0_r0_eval(tree, cX, vals[tree.op], operators)
119+
return deg2_l0_r0_eval(tree, cX, vals[tree.op], operators, Val(turbo))
109120
elseif tree.l.degree == 0
110121
# op(x, y), where x is a constant or variable but y is not.
111-
return deg2_l0_eval(tree, cX, vals[tree.op], operators)
122+
return deg2_l0_eval(tree, cX, vals[tree.op], operators, Val(turbo))
112123
elseif tree.r.degree == 0
113124
# op(x, y), where y is a constant or variable but x is not.
114-
return deg2_r0_eval(tree, cX, vals[tree.op], operators)
125+
return deg2_r0_eval(tree, cX, vals[tree.op], operators, Val(turbo))
115126
else
116127
# op(x, y), for any x or y
117-
return deg2_eval(tree, cX, vals[tree.op], operators)
128+
return deg2_eval(tree, cX, vals[tree.op], operators, Val(turbo))
118129
end
119130
end
120131
end
121132

122133
function deg2_eval(
123-
tree::Node{T}, cX::AbstractMatrix{T}, ::Val{op_idx}, operators::OperatorEnum
124-
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx}
134+
tree::Node{T},
135+
cX::AbstractMatrix{T},
136+
::Val{op_idx},
137+
operators::OperatorEnum,
138+
::Val{turbo},
139+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,turbo}
125140
n = size(cX, 2)
126-
(cumulator, complete) = _eval_tree_array(tree.l, cX, operators)
141+
(cumulator, complete) = _eval_tree_array(tree.l, cX, operators, Val(turbo))
127142
@return_on_false complete cumulator
128143
@return_on_nonfinite_array cumulator T n
129-
(array2, complete2) = _eval_tree_array(tree.r, cX, operators)
144+
(array2, complete2) = _eval_tree_array(tree.r, cX, operators, Val(turbo))
130145
@return_on_false complete2 cumulator
131146
@return_on_nonfinite_array array2 T n
132147
op = operators.binops[op_idx]
133148

134149
# We check inputs (and intermediates), not outputs.
135-
@inbounds @simd for j in 1:n
150+
@maybe_turbo turbo for j in indices(cumulator)
136151
x = op(cumulator[j], array2[j])::T
137152
cumulator[j] = x
138153
end
@@ -141,22 +156,26 @@ function deg2_eval(
141156
end
142157

143158
function deg1_eval(
144-
tree::Node{T}, cX::AbstractMatrix{T}, ::Val{op_idx}, operators::OperatorEnum
145-
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx}
159+
tree::Node{T},
160+
cX::AbstractMatrix{T},
161+
::Val{op_idx},
162+
operators::OperatorEnum,
163+
::Val{turbo},
164+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,turbo}
146165
n = size(cX, 2)
147-
(cumulator, complete) = _eval_tree_array(tree.l, cX, operators)
166+
(cumulator, complete) = _eval_tree_array(tree.l, cX, operators, Val(turbo))
148167
@return_on_false complete cumulator
149168
@return_on_nonfinite_array cumulator T n
150169
op = operators.unaops[op_idx]
151-
@inbounds @simd for j in 1:n
170+
@maybe_turbo turbo for j in indices(cumulator)
152171
x = op(cumulator[j])::T
153172
cumulator[j] = x
154173
end
155174
return (cumulator, true) #
156175
end
157176

158177
function deg0_eval(
159-
tree::Node{T}, cX::AbstractMatrix{T}, operators::OperatorEnum
178+
tree::Node{T}, cX::AbstractMatrix{T}
160179
)::Tuple{AbstractVector{T},Bool} where {T<:Real}
161180
n = size(cX, 2)
162181
if tree.constant
@@ -172,7 +191,8 @@ function deg1_l2_ll0_lr0_eval(
172191
::Val{op_idx},
173192
::Val{op_l_idx},
174193
operators::OperatorEnum,
175-
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,op_l_idx}
194+
::Val{turbo},
195+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,op_l_idx,turbo}
176196
n = size(cX, 2)
177197
op = operators.unaops[op_idx]
178198
op_l = operators.binops[op_l_idx]
@@ -191,7 +211,7 @@ function deg1_l2_ll0_lr0_eval(
191211
@return_on_check val_ll T n
192212
feature_lr = tree.l.r.feature
193213
cumulator = Array{T,1}(undef, n)
194-
@inbounds @simd for j in 1:n
214+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
195215
x_l = op_l(val_ll, cX[feature_lr, j])::T
196216
x = isfinite(x_l) ? op(x_l)::T : T(Inf) # These will get discovered by _eval_tree_array at end.
197217
cumulator[j] = x
@@ -202,7 +222,7 @@ function deg1_l2_ll0_lr0_eval(
202222
val_lr = tree.l.r.val::T
203223
@return_on_check val_lr T n
204224
cumulator = Array{T,1}(undef, n)
205-
@inbounds @simd for j in 1:n
225+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
206226
x_l = op_l(cX[feature_ll, j], val_lr)::T
207227
x = isfinite(x_l) ? op(x_l)::T : T(Inf)
208228
cumulator[j] = x
@@ -212,7 +232,7 @@ function deg1_l2_ll0_lr0_eval(
212232
feature_ll = tree.l.l.feature
213233
feature_lr = tree.l.r.feature
214234
cumulator = Array{T,1}(undef, n)
215-
@inbounds @simd for j in 1:n
235+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
216236
x_l = op_l(cX[feature_ll, j], cX[feature_lr, j])::T
217237
x = isfinite(x_l) ? op(x_l)::T : T(Inf)
218238
cumulator[j] = x
@@ -228,7 +248,8 @@ function deg1_l1_ll0_eval(
228248
::Val{op_idx},
229249
::Val{op_l_idx},
230250
operators::OperatorEnum,
231-
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,op_l_idx}
251+
::Val{turbo},
252+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,op_l_idx,turbo}
232253
n = size(cX, 2)
233254
op = operators.unaops[op_idx]
234255
op_l = operators.unaops[op_l_idx]
@@ -243,7 +264,7 @@ function deg1_l1_ll0_eval(
243264
else
244265
feature_ll = tree.l.l.feature
245266
cumulator = Array{T,1}(undef, n)
246-
@inbounds @simd for j in 1:n
267+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
247268
x_l = op_l(cX[feature_ll, j])::T
248269
x = isfinite(x_l) ? op(x_l)::T : T(Inf)
249270
cumulator[j] = x
@@ -253,8 +274,12 @@ function deg1_l1_ll0_eval(
253274
end
254275

255276
function deg2_l0_r0_eval(
256-
tree::Node{T}, cX::AbstractMatrix{T}, ::Val{op_idx}, operators::OperatorEnum
257-
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx}
277+
tree::Node{T},
278+
cX::AbstractMatrix{T},
279+
::Val{op_idx},
280+
operators::OperatorEnum,
281+
::Val{turbo},
282+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,turbo}
258283
n = size(cX, 2)
259284
op = operators.binops[op_idx]
260285
if tree.l.constant && tree.r.constant
@@ -270,7 +295,7 @@ function deg2_l0_r0_eval(
270295
val_l = tree.l.val::T
271296
@return_on_check val_l T n
272297
feature_r = tree.r.feature
273-
@inbounds @simd for j in 1:n
298+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
274299
x = op(val_l, cX[feature_r, j])::T
275300
cumulator[j] = x
276301
end
@@ -279,15 +304,15 @@ function deg2_l0_r0_eval(
279304
feature_l = tree.l.feature
280305
val_r = tree.r.val::T
281306
@return_on_check val_r T n
282-
@inbounds @simd for j in 1:n
307+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
283308
x = op(cX[feature_l, j], val_r)::T
284309
cumulator[j] = x
285310
end
286311
else
287312
cumulator = Array{T,1}(undef, n)
288313
feature_l = tree.l.feature
289314
feature_r = tree.r.feature
290-
@inbounds @simd for j in 1:n
315+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
291316
x = op(cX[feature_l, j], cX[feature_r, j])::T
292317
cumulator[j] = x
293318
end
@@ -296,23 +321,27 @@ function deg2_l0_r0_eval(
296321
end
297322

298323
function deg2_l0_eval(
299-
tree::Node{T}, cX::AbstractMatrix{T}, ::Val{op_idx}, operators::OperatorEnum
300-
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx}
324+
tree::Node{T},
325+
cX::AbstractMatrix{T},
326+
::Val{op_idx},
327+
operators::OperatorEnum,
328+
::Val{turbo},
329+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,turbo}
301330
n = size(cX, 2)
302-
(cumulator, complete) = _eval_tree_array(tree.r, cX, operators)
331+
(cumulator, complete) = _eval_tree_array(tree.r, cX, operators, Val(turbo))
303332
@return_on_false complete cumulator
304333
@return_on_nonfinite_array cumulator T n
305334
op = operators.binops[op_idx]
306335
if tree.l.constant
307336
val = tree.l.val::T
308337
@return_on_check val T n
309-
@inbounds @simd for j in 1:n
338+
@maybe_turbo turbo for j in indices(cumulator)
310339
x = op(val, cumulator[j])::T
311340
cumulator[j] = x
312341
end
313342
else
314343
feature = tree.l.feature
315-
@inbounds @simd for j in 1:n
344+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
316345
x = op(cX[feature, j], cumulator[j])::T
317346
cumulator[j] = x
318347
end
@@ -321,23 +350,27 @@ function deg2_l0_eval(
321350
end
322351

323352
function deg2_r0_eval(
324-
tree::Node{T}, cX::AbstractMatrix{T}, ::Val{op_idx}, operators::OperatorEnum
325-
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx}
353+
tree::Node{T},
354+
cX::AbstractMatrix{T},
355+
::Val{op_idx},
356+
operators::OperatorEnum,
357+
::Val{turbo},
358+
)::Tuple{AbstractVector{T},Bool} where {T<:Real,op_idx,turbo}
326359
n = size(cX, 2)
327-
(cumulator, complete) = _eval_tree_array(tree.l, cX, operators)
360+
(cumulator, complete) = _eval_tree_array(tree.l, cX, operators, Val(turbo))
328361
@return_on_false complete cumulator
329362
@return_on_nonfinite_array cumulator T n
330363
op = operators.binops[op_idx]
331364
if tree.r.constant
332365
val = tree.r.val::T
333366
@return_on_check val T n
334-
@inbounds @simd for j in 1:n
367+
@maybe_turbo turbo for j in indices(cumulator)
335368
x = op(cumulator[j], val)::T
336369
cumulator[j] = x
337370
end
338371
else
339372
feature = tree.r.feature
340-
@inbounds @simd for j in 1:n
373+
@maybe_turbo turbo for j in indices((cX, cumulator), (2, 1))
341374
x = op(cumulator[j], cX[feature, j])::T
342375
cumulator[j] = x
343376
end

0 commit comments

Comments
 (0)