1
1
module EvaluateEquationModule
2
2
3
+ import LoopVectorization: @turbo , indices
3
4
import .. EquationModule: Node, string_tree
4
5
import .. OperatorEnumModule: OperatorEnum, GenericOperatorEnum
5
- import .. UtilsModule: @return_on_false , is_bad_array, vals
6
+ import .. UtilsModule: @return_on_false , @maybe_turbo , is_bad_array, vals
6
7
import .. EquationUtilsModule: is_constant
7
8
8
9
macro return_on_check (val, T, n)
@@ -27,7 +28,7 @@ macro return_on_nonfinite_array(array, T, n)
27
28
end
28
29
29
30
"""
30
- eval_tree_array(tree::Node, cX::AbstractMatrix{T}, operators::OperatorEnum)
31
+ eval_tree_array(tree::Node, cX::AbstractMatrix{T}, operators::OperatorEnum; turbo::Bool )
31
32
32
33
Evaluate a binary tree (equation) over a given input data matrix. The
33
34
operators contain all of the operators used. This function fuses doublets
@@ -51,6 +52,7 @@ which speed up evaluation significantly.
51
52
- `tree::Node`: The root node of the tree to evaluate.
52
53
- `cX::AbstractMatrix{T}`: The input data to evaluate the tree on.
53
54
- `operators::OperatorEnum`: The operators used in the tree.
55
+ - `turbo::Bool`: Use `LoopVectorization.@turbo` for faster evaluation.
54
56
55
57
# Returns
56
58
- `(output, complete)::Tuple{AbstractVector{T}, Bool}`: the result,
@@ -60,31 +62,36 @@ which speed up evaluation significantly.
60
62
to the equation.
61
63
"""
62
64
function eval_tree_array (
63
- tree:: Node{T} , cX:: AbstractMatrix{T} , operators:: OperatorEnum
65
+ tree:: Node{T} , cX:: AbstractMatrix{T} , operators:: OperatorEnum ; turbo :: Bool = false
64
66
):: Tuple{AbstractVector{T},Bool} where {T<: Real }
65
67
n = size (cX, 2 )
66
- result, finished = _eval_tree_array (tree, cX, operators)
68
+ if turbo
69
+ @assert T in (Float32, Float64)
70
+ end
71
+ result, finished = _eval_tree_array (
72
+ tree, cX, operators, (turbo ? Val (true ) : Val (false ))
73
+ )
67
74
@return_on_false finished result
68
75
@return_on_nonfinite_array result T n
69
76
return result, finished
70
77
end
71
78
function eval_tree_array (
72
- tree:: Node{T1} , cX:: AbstractMatrix{T2} , operators:: OperatorEnum
79
+ tree:: Node{T1} , cX:: AbstractMatrix{T2} , operators:: OperatorEnum ; turbo :: Bool = false
73
80
) where {T1<: Real ,T2<: Real }
74
81
T = promote_type (T1, T2)
75
82
@warn " Warning: eval_tree_array received mixed types: tree=$(T1) and data=$(T2) ."
76
83
tree = convert (Node{T}, tree)
77
84
cX = convert (AbstractMatrix{T}, cX)
78
- return eval_tree_array (tree, cX, operators)
85
+ return eval_tree_array (tree, cX, operators; turbo = turbo )
79
86
end
80
87
81
88
function _eval_tree_array (
82
- tree:: Node{T} , cX:: AbstractMatrix{T} , operators:: OperatorEnum
83
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real }
89
+ tree:: Node{T} , cX:: AbstractMatrix{T} , operators:: OperatorEnum , :: Val{turbo}
90
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,turbo }
84
91
# First, we see if there are only constants in the tree - meaning
85
92
# we can just return the constant result.
86
93
if tree. degree == 0
87
- return deg0_eval (tree, cX, operators )
94
+ return deg0_eval (tree, cX)
88
95
elseif is_constant (tree)
89
96
# Speed hack for constant trees.
90
97
result, flag = _eval_constant_tree (tree, operators)
@@ -93,46 +100,54 @@ function _eval_tree_array(
93
100
elseif tree. degree == 1
94
101
if tree. l. degree == 2 && tree. l. l. degree == 0 && tree. l. r. degree == 0
95
102
# op(op2(x, y)), where x, y, z are constants or variables.
96
- return deg1_l2_ll0_lr0_eval (tree, cX, vals[tree. op], vals[tree. l. op], operators)
103
+ return deg1_l2_ll0_lr0_eval (
104
+ tree, cX, vals[tree. op], vals[tree. l. op], operators, Val (turbo)
105
+ )
97
106
elseif tree. l. degree == 1 && tree. l. l. degree == 0
98
107
# op(op2(x)), where x is a constant or variable.
99
- return deg1_l1_ll0_eval (tree, cX, vals[tree. op], vals[tree. l. op], operators)
108
+ return deg1_l1_ll0_eval (
109
+ tree, cX, vals[tree. op], vals[tree. l. op], operators, Val (turbo)
110
+ )
100
111
else
101
112
# op(x), for any x.
102
- return deg1_eval (tree, cX, vals[tree. op], operators)
113
+ return deg1_eval (tree, cX, vals[tree. op], operators, Val (turbo) )
103
114
end
104
115
elseif tree. degree == 2
105
116
# TODO - add op(op2(x, y), z) and op(x, op2(y, z))
106
117
if tree. l. degree == 0 && tree. r. degree == 0
107
118
# op(x, y), where x, y are constants or variables.
108
- return deg2_l0_r0_eval (tree, cX, vals[tree. op], operators)
119
+ return deg2_l0_r0_eval (tree, cX, vals[tree. op], operators, Val (turbo) )
109
120
elseif tree. l. degree == 0
110
121
# op(x, y), where x is a constant or variable but y is not.
111
- return deg2_l0_eval (tree, cX, vals[tree. op], operators)
122
+ return deg2_l0_eval (tree, cX, vals[tree. op], operators, Val (turbo) )
112
123
elseif tree. r. degree == 0
113
124
# op(x, y), where y is a constant or variable but x is not.
114
- return deg2_r0_eval (tree, cX, vals[tree. op], operators)
125
+ return deg2_r0_eval (tree, cX, vals[tree. op], operators, Val (turbo) )
115
126
else
116
127
# op(x, y), for any x or y
117
- return deg2_eval (tree, cX, vals[tree. op], operators)
128
+ return deg2_eval (tree, cX, vals[tree. op], operators, Val (turbo) )
118
129
end
119
130
end
120
131
end
121
132
122
133
function deg2_eval (
123
- tree:: Node{T} , cX:: AbstractMatrix{T} , :: Val{op_idx} , operators:: OperatorEnum
124
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx}
134
+ tree:: Node{T} ,
135
+ cX:: AbstractMatrix{T} ,
136
+ :: Val{op_idx} ,
137
+ operators:: OperatorEnum ,
138
+ :: Val{turbo} ,
139
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,turbo}
125
140
n = size (cX, 2 )
126
- (cumulator, complete) = _eval_tree_array (tree. l, cX, operators)
141
+ (cumulator, complete) = _eval_tree_array (tree. l, cX, operators, Val (turbo) )
127
142
@return_on_false complete cumulator
128
143
@return_on_nonfinite_array cumulator T n
129
- (array2, complete2) = _eval_tree_array (tree. r, cX, operators)
144
+ (array2, complete2) = _eval_tree_array (tree. r, cX, operators, Val (turbo) )
130
145
@return_on_false complete2 cumulator
131
146
@return_on_nonfinite_array array2 T n
132
147
op = operators. binops[op_idx]
133
148
134
149
# We check inputs (and intermediates), not outputs.
135
- @inbounds @simd for j in 1 : n
150
+ @maybe_turbo turbo for j in indices (cumulator)
136
151
x = op (cumulator[j], array2[j]):: T
137
152
cumulator[j] = x
138
153
end
@@ -141,22 +156,26 @@ function deg2_eval(
141
156
end
142
157
143
158
function deg1_eval (
144
- tree:: Node{T} , cX:: AbstractMatrix{T} , :: Val{op_idx} , operators:: OperatorEnum
145
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx}
159
+ tree:: Node{T} ,
160
+ cX:: AbstractMatrix{T} ,
161
+ :: Val{op_idx} ,
162
+ operators:: OperatorEnum ,
163
+ :: Val{turbo} ,
164
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,turbo}
146
165
n = size (cX, 2 )
147
- (cumulator, complete) = _eval_tree_array (tree. l, cX, operators)
166
+ (cumulator, complete) = _eval_tree_array (tree. l, cX, operators, Val (turbo) )
148
167
@return_on_false complete cumulator
149
168
@return_on_nonfinite_array cumulator T n
150
169
op = operators. unaops[op_idx]
151
- @inbounds @simd for j in 1 : n
170
+ @maybe_turbo turbo for j in indices (cumulator)
152
171
x = op (cumulator[j]):: T
153
172
cumulator[j] = x
154
173
end
155
174
return (cumulator, true ) #
156
175
end
157
176
158
177
function deg0_eval (
159
- tree:: Node{T} , cX:: AbstractMatrix{T} , operators :: OperatorEnum
178
+ tree:: Node{T} , cX:: AbstractMatrix{T}
160
179
):: Tuple{AbstractVector{T},Bool} where {T<: Real }
161
180
n = size (cX, 2 )
162
181
if tree. constant
@@ -172,7 +191,8 @@ function deg1_l2_ll0_lr0_eval(
172
191
:: Val{op_idx} ,
173
192
:: Val{op_l_idx} ,
174
193
operators:: OperatorEnum ,
175
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,op_l_idx}
194
+ :: Val{turbo} ,
195
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,op_l_idx,turbo}
176
196
n = size (cX, 2 )
177
197
op = operators. unaops[op_idx]
178
198
op_l = operators. binops[op_l_idx]
@@ -191,7 +211,7 @@ function deg1_l2_ll0_lr0_eval(
191
211
@return_on_check val_ll T n
192
212
feature_lr = tree. l. r. feature
193
213
cumulator = Array {T,1} (undef, n)
194
- @inbounds @simd for j in 1 : n
214
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
195
215
x_l = op_l (val_ll, cX[feature_lr, j]):: T
196
216
x = isfinite (x_l) ? op (x_l):: T : T (Inf ) # These will get discovered by _eval_tree_array at end.
197
217
cumulator[j] = x
@@ -202,7 +222,7 @@ function deg1_l2_ll0_lr0_eval(
202
222
val_lr = tree. l. r. val:: T
203
223
@return_on_check val_lr T n
204
224
cumulator = Array {T,1} (undef, n)
205
- @inbounds @simd for j in 1 : n
225
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
206
226
x_l = op_l (cX[feature_ll, j], val_lr):: T
207
227
x = isfinite (x_l) ? op (x_l):: T : T (Inf )
208
228
cumulator[j] = x
@@ -212,7 +232,7 @@ function deg1_l2_ll0_lr0_eval(
212
232
feature_ll = tree. l. l. feature
213
233
feature_lr = tree. l. r. feature
214
234
cumulator = Array {T,1} (undef, n)
215
- @inbounds @simd for j in 1 : n
235
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
216
236
x_l = op_l (cX[feature_ll, j], cX[feature_lr, j]):: T
217
237
x = isfinite (x_l) ? op (x_l):: T : T (Inf )
218
238
cumulator[j] = x
@@ -228,7 +248,8 @@ function deg1_l1_ll0_eval(
228
248
:: Val{op_idx} ,
229
249
:: Val{op_l_idx} ,
230
250
operators:: OperatorEnum ,
231
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,op_l_idx}
251
+ :: Val{turbo} ,
252
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,op_l_idx,turbo}
232
253
n = size (cX, 2 )
233
254
op = operators. unaops[op_idx]
234
255
op_l = operators. unaops[op_l_idx]
@@ -243,7 +264,7 @@ function deg1_l1_ll0_eval(
243
264
else
244
265
feature_ll = tree. l. l. feature
245
266
cumulator = Array {T,1} (undef, n)
246
- @inbounds @simd for j in 1 : n
267
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
247
268
x_l = op_l (cX[feature_ll, j]):: T
248
269
x = isfinite (x_l) ? op (x_l):: T : T (Inf )
249
270
cumulator[j] = x
@@ -253,8 +274,12 @@ function deg1_l1_ll0_eval(
253
274
end
254
275
255
276
function deg2_l0_r0_eval (
256
- tree:: Node{T} , cX:: AbstractMatrix{T} , :: Val{op_idx} , operators:: OperatorEnum
257
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx}
277
+ tree:: Node{T} ,
278
+ cX:: AbstractMatrix{T} ,
279
+ :: Val{op_idx} ,
280
+ operators:: OperatorEnum ,
281
+ :: Val{turbo} ,
282
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,turbo}
258
283
n = size (cX, 2 )
259
284
op = operators. binops[op_idx]
260
285
if tree. l. constant && tree. r. constant
@@ -270,7 +295,7 @@ function deg2_l0_r0_eval(
270
295
val_l = tree. l. val:: T
271
296
@return_on_check val_l T n
272
297
feature_r = tree. r. feature
273
- @inbounds @simd for j in 1 : n
298
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
274
299
x = op (val_l, cX[feature_r, j]):: T
275
300
cumulator[j] = x
276
301
end
@@ -279,15 +304,15 @@ function deg2_l0_r0_eval(
279
304
feature_l = tree. l. feature
280
305
val_r = tree. r. val:: T
281
306
@return_on_check val_r T n
282
- @inbounds @simd for j in 1 : n
307
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
283
308
x = op (cX[feature_l, j], val_r):: T
284
309
cumulator[j] = x
285
310
end
286
311
else
287
312
cumulator = Array {T,1} (undef, n)
288
313
feature_l = tree. l. feature
289
314
feature_r = tree. r. feature
290
- @inbounds @simd for j in 1 : n
315
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
291
316
x = op (cX[feature_l, j], cX[feature_r, j]):: T
292
317
cumulator[j] = x
293
318
end
@@ -296,23 +321,27 @@ function deg2_l0_r0_eval(
296
321
end
297
322
298
323
function deg2_l0_eval (
299
- tree:: Node{T} , cX:: AbstractMatrix{T} , :: Val{op_idx} , operators:: OperatorEnum
300
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx}
324
+ tree:: Node{T} ,
325
+ cX:: AbstractMatrix{T} ,
326
+ :: Val{op_idx} ,
327
+ operators:: OperatorEnum ,
328
+ :: Val{turbo} ,
329
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,turbo}
301
330
n = size (cX, 2 )
302
- (cumulator, complete) = _eval_tree_array (tree. r, cX, operators)
331
+ (cumulator, complete) = _eval_tree_array (tree. r, cX, operators, Val (turbo) )
303
332
@return_on_false complete cumulator
304
333
@return_on_nonfinite_array cumulator T n
305
334
op = operators. binops[op_idx]
306
335
if tree. l. constant
307
336
val = tree. l. val:: T
308
337
@return_on_check val T n
309
- @inbounds @simd for j in 1 : n
338
+ @maybe_turbo turbo for j in indices (cumulator)
310
339
x = op (val, cumulator[j]):: T
311
340
cumulator[j] = x
312
341
end
313
342
else
314
343
feature = tree. l. feature
315
- @inbounds @simd for j in 1 : n
344
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
316
345
x = op (cX[feature, j], cumulator[j]):: T
317
346
cumulator[j] = x
318
347
end
@@ -321,23 +350,27 @@ function deg2_l0_eval(
321
350
end
322
351
323
352
function deg2_r0_eval (
324
- tree:: Node{T} , cX:: AbstractMatrix{T} , :: Val{op_idx} , operators:: OperatorEnum
325
- ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx}
353
+ tree:: Node{T} ,
354
+ cX:: AbstractMatrix{T} ,
355
+ :: Val{op_idx} ,
356
+ operators:: OperatorEnum ,
357
+ :: Val{turbo} ,
358
+ ):: Tuple{AbstractVector{T},Bool} where {T<: Real ,op_idx,turbo}
326
359
n = size (cX, 2 )
327
- (cumulator, complete) = _eval_tree_array (tree. l, cX, operators)
360
+ (cumulator, complete) = _eval_tree_array (tree. l, cX, operators, Val (turbo) )
328
361
@return_on_false complete cumulator
329
362
@return_on_nonfinite_array cumulator T n
330
363
op = operators. binops[op_idx]
331
364
if tree. r. constant
332
365
val = tree. r. val:: T
333
366
@return_on_check val T n
334
- @inbounds @simd for j in 1 : n
367
+ @maybe_turbo turbo for j in indices (cumulator)
335
368
x = op (cumulator[j], val):: T
336
369
cumulator[j] = x
337
370
end
338
371
else
339
372
feature = tree. r. feature
340
- @inbounds @simd for j in 1 : n
373
+ @maybe_turbo turbo for j in indices ((cX, cumulator), ( 2 , 1 ))
341
374
x = op (cumulator[j], cX[feature, j]):: T
342
375
cumulator[j] = x
343
376
end
0 commit comments