Skip to content

Commit a437638

Browse files
arm_compute v17.04
1 parent 68a98dc commit a437638

File tree

1,746 files changed

+20362
-11113
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,746 files changed

+20362
-11113
lines changed

arm_compute/core/AccessWindowAutoPadding.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,16 @@ class AccessWindowAutoPadding : public IAccessWindow
5858
AccessWindowAutoPadding &operator=(AccessWindowAutoPadding &&) = default;
5959
~AccessWindowAutoPadding() = default;
6060

61+
/** Set the valid region to match the entire tensor. */
6162
void set_valid_region();
6263

64+
/** Return a valid region that spans across the entire tensor. */
65+
ValidRegion compute_valid_region() const;
66+
6367
// Inherited methods overridden:
6468
bool update_window_if_needed(Window &window) const override;
6569
bool update_padding_if_needed(const Window &window) const override;
66-
void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) override;
70+
ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
6771

6872
private:
6973
TensorInfo *_info;

arm_compute/core/AccessWindowStatic.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,25 @@ class AccessWindowStatic : public IAccessWindow
6262
AccessWindowStatic &operator=(AccessWindowStatic &&) = default;
6363
~AccessWindowStatic() = default;
6464

65+
/** Set the valid region based on the static access pattern and valid
66+
* region of the inputs.
67+
*
68+
* @param[in] window Execution window of the kernel.
69+
* @param[in] input_valid_region Combined valid region of all inputs.
70+
*/
71+
void set_valid_region(const Window &window, const ValidRegion &input_valid_region);
72+
73+
/** Compute the valid region based on the static access pattern and valid region of the inputs.
74+
*
75+
* @param[in] window Execution window of the kernel.
76+
* @param[in] input_valid_region Combined valid region of all inputs.
77+
*/
78+
ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region) const;
79+
6580
// Inherited methods overriden:
6681
bool update_window_if_needed(Window &window) const override;
6782
bool update_padding_if_needed(const Window &window) const override;
68-
void set_valid_region(const Window &window, ValidRegion input_valid_region);
69-
void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) override;
83+
ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
7084

7185
TensorInfo *_info;
7286
int _start_x;

arm_compute/core/AccessWindowTranspose.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ class AccessWindowTranspose : public AccessWindowRectangle
4141
using AccessWindowRectangle::AccessWindowRectangle;
4242
bool update_window_if_needed(Window &window) const override;
4343
bool update_padding_if_needed(const Window &window) const override;
44-
using AccessWindowRectangle::set_valid_region;
45-
void set_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) override;
44+
using AccessWindowRectangle::compute_valid_region;
45+
ValidRegion compute_valid_region(const Window &window, ValidRegion input_valid_region, bool border_undefined, BorderSize border_size) const override;
4646
};
4747
} // namespace arm_compute
4848
#endif /*__ARM_COMPUTE_IACCESS_WINDOW_TRANSPOSE_H__*/

arm_compute/core/CL/CLKernels.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#define __ARM_COMPUTE_CLKERNELS_H__
2626

2727
/* Header regrouping all the CL kernels */
28-
2928
#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
3029
#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
3130
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
@@ -50,6 +49,7 @@
5049
#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
5150
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
5251
#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h"
52+
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
5353
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
5454
#include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
5555
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"

arm_compute/core/CL/ICLSimpleKernel.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,13 @@ class ICLSimpleKernel : public ICLKernel
4949

5050
/** Configure the kernel
5151
*
52-
* @param[in] input Source tensor.
53-
* @param[out] output Destination tensor.
54-
* @param[in] processed_elements Number of processed elements per iteration.
55-
* @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
56-
* @param[in] border_size (Optional) Size of the border.
52+
* @param[in] input Source tensor.
53+
* @param[out] output Destination tensor.
54+
* @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
55+
* @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
56+
* @param[in] border_size (Optional) Size of the border.
5757
*/
58-
void configure(const ICLTensor *input, ICLTensor *output, unsigned int processed_elements, bool border_undefined = false, const BorderSize &border_size = BorderSize());
58+
void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
5959

6060
protected:
6161
const ICLTensor *_input;

arm_compute/core/CL/kernels/CLCannyEdgeKernel.h

Lines changed: 33 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -47,32 +47,28 @@ class CLGradientKernel : public ICLKernel
4747
*
4848
* @note gx, gy and mag must all be the same size (either 16 or 32).
4949
*
50-
* @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
51-
* @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
52-
* @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
53-
* @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
54-
* @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
55-
* @param[in] num_pixel_to_skip_prev Number of pixels to skip of previous stage if border_mode = UNDEFINED
56-
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
50+
* @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
51+
* @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
52+
* @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
53+
* @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
54+
* @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
5755
*/
58-
void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type, int32_t num_pixel_to_skip_prev, bool border_undefined);
56+
void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
5957

6058
// Inherited methods overridden:
6159
void run(const Window &window, cl::CommandQueue &queue) override;
62-
BorderSize border_size() const override;
6360

6461
private:
65-
const ICLTensor *_gx; /**< Source tensor - Gx component */
66-
const ICLTensor *_gy; /**< Source tensor - Gy component */
67-
ICLTensor *_magnitude; /**< Destination tensor - Magnitude */
68-
ICLTensor *_phase; /**< Destination tensor - Quantized phase */
69-
unsigned int _pixels_to_skip; /**< Pixels to skip around the border. */
62+
const ICLTensor *_gx; /**< Source tensor - Gx component */
63+
const ICLTensor *_gy; /**< Source tensor - Gy component */
64+
ICLTensor *_magnitude; /**< Destination tensor - Magnitude */
65+
ICLTensor *_phase; /**< Destination tensor - Quantized phase */
7066
};
7167

7268
/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge.
7369
*
7470
* @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
75-
* to characterize points as possible edges.
71+
* to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed.
7672
*
7773
* @note Hysteresis is computed in @ref CLEdgeTraceKernel
7874
*/
@@ -87,24 +83,22 @@ class CLEdgeNonMaxSuppressionKernel : public ICLKernel
8783
CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete;
8884
/** Initialise the kernel's sources, destination and border mode.
8985
*
90-
* @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
91-
* @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
92-
* @param[out] output Destination tensor
93-
* @param[in] lower_thr Lower threshold.
94-
* @param[in] num_pixel_to_skip_prev Number of pixels to skip of previous stage if border_mode = UNDEFINED
95-
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
86+
* @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
87+
* @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
88+
* @param[out] output Destination tensor. Data types supported: U16/U32.
89+
* @param[in] lower_thr Lower threshold.
90+
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
9691
*/
97-
void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, int32_t num_pixel_to_skip_prev, bool border_undefined);
92+
void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
9893

9994
// Inherited methods overridden:
10095
void run(const Window &window, cl::CommandQueue &queue) override;
10196
BorderSize border_size() const override;
10297

10398
private:
104-
const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */
105-
const ICLTensor *_phase; /**< Source tensor - Quantized phase. */
106-
ICLTensor *_output; /**< Destination tensor. */
107-
unsigned int _pixels_to_skip; /**< Pixels to skip around the border. */
99+
const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */
100+
const ICLTensor *_phase; /**< Source tensor - Quantized phase. */
101+
ICLTensor *_output; /**< Destination tensor. */
108102
};
109103

110104
/** OpenCL kernel to perform Edge tracing.
@@ -120,28 +114,24 @@ class CLEdgeTraceKernel : public ICLKernel
120114
CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
121115
/** Initialise the kernel's source, destination and border mode.
122116
*
123-
* @param[in] input Source tensor. Data types supported: U8.
124-
* @param[out] output Destination tensor. Data types supported: U8.
125-
* @param[in] upper_thr Upper threshold used for the hysteresis
126-
* @param[in] lower_thr Lower threshold used for the hysteresis
127-
* @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
128-
* Expected to be initialized to 0 before each run.
129-
* @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
130-
* Expected to be initialized to 0 before each run.
131-
* @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
132-
* Expected to be initialized to 0 before each run.
133-
* @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
134-
* Expected to be initialized to 0 before each run.
135-
* @param[in] num_pixel_to_skip_prev Number of pixels to skip of previous stage if border_mode = UNDEFINED.
136-
public: * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
117+
* @param[in] input Source tensor. Data types supported: U8.
118+
* @param[out] output Destination tensor. Data types supported: U8.
119+
* @param[in] upper_thr Upper threshold used for the hysteresis
120+
* @param[in] lower_thr Lower threshold used for the hysteresis
121+
* @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
122+
* Expected to be initialized to 0 before each run.
123+
* @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
124+
* Expected to be initialized to 0 before each run.
125+
* @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
126+
* Expected to be initialized to 0 before each run.
127+
* @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
128+
* Expected to be initialized to 0 before each run.
137129
*/
138130
void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
139-
ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter,
140-
int32_t num_pixel_to_skip_prev, bool border_undefined);
131+
ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
141132

142133
// Inherited methods overridden:
143134
void run(const Window &window, cl::CommandQueue &queue) override;
144-
BorderSize border_size() const override;
145135

146136
private:
147137
const ICLTensor *_input; /**< Source tensor. */
@@ -152,7 +142,6 @@ class CLEdgeTraceKernel : public ICLKernel
152142
ICLTensor *_recorded; /**< Marks recorded elements */
153143
ICLTensor *_l1_stack; /**< L1 hysteris stack */
154144
ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */
155-
unsigned int _pixels_to_skip; /**< Pixels to skip */
156145
};
157146
}
158147
#endif /* __ARM_COMPUTE_CLCANNYEDGEKERNEL_H__ */

arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@
2424
#ifndef __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__
2525
#define __ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__
2626

27-
#include "arm_compute/core/CL/ICLKernel.h"
27+
#include "arm_compute/core/CL/ICLSimpleKernel.h"
2828

2929
namespace arm_compute
3030
{
3131
class ICLTensor;
3232

3333
/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */
34-
class CLGaussianPyramidHorKernel : public ICLKernel
34+
class CLGaussianPyramidHorKernel : public ICLSimpleKernel
3535
{
3636
public:
3737
/** Default constructor */
@@ -60,12 +60,12 @@ class CLGaussianPyramidHorKernel : public ICLKernel
6060
BorderSize border_size() const override;
6161

6262
private:
63-
const ICLTensor *_input;
64-
ICLTensor *_output;
63+
BorderSize _border_size;
64+
int _l2_load_offset;
6565
};
6666

6767
/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */
68-
class CLGaussianPyramidVertKernel : public ICLKernel
68+
class CLGaussianPyramidVertKernel : public ICLSimpleKernel
6969
{
7070
public:
7171
/** Default constructor */
@@ -94,8 +94,7 @@ class CLGaussianPyramidVertKernel : public ICLKernel
9494
BorderSize border_size() const override;
9595

9696
private:
97-
const ICLTensor *_input;
98-
ICLTensor *_output;
97+
int _t2_load_offset;
9998
};
10099
}
101100
#endif /*__ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H__ */

arm_compute/core/CL/kernels/CLLKTrackerKernel.h

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,16 @@ class CLLKTrackerFinalizeKernel : public ICLKernel
108108
class CLLKTrackerStage0Kernel : public ICLKernel
109109
{
110110
public:
111+
/** Default constructor */
112+
CLLKTrackerStage0Kernel();
113+
/** Prevent instances of this class from being copied (As this class contains pointers) */
114+
CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete;
115+
/** Prevent instances of this class from being copied (As this class contains pointers) */
116+
CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete;
117+
/** Allow instances of this class to be moved */
118+
CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default;
119+
/** Allow instances of this class to be moved */
120+
CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default;
111121
/** Initialise the kernel input and output
112122
*
113123
* @param[in] old_input Pointer to the input old tensor. Data types supported: U8
@@ -119,21 +129,35 @@ class CLLKTrackerStage0Kernel : public ICLKernel
119129
* @param[out] old_ival Pointer to the array holding internal values
120130
* @param[in] window_dimension The size of the window on which to perform the algorithm
121131
* @param[in] level The pyramid level
122-
* @param[in] border_offset The offset used to define the boundary of the tracked pixels in different border modes
123132
*/
124133
void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
125134
ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
126135
ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
127-
size_t window_dimension, size_t level, int32_t border_offset);
136+
size_t window_dimension, size_t level);
128137

129138
// Inherited methods overridden:
130139
void run(const Window &window, cl::CommandQueue &queue) override;
140+
141+
private:
142+
const ICLTensor *_old_input;
143+
const ICLTensor *_old_scharr_gx;
144+
const ICLTensor *_old_scharr_gy;
131145
};
132146

133147
/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */
134148
class CLLKTrackerStage1Kernel : public ICLKernel
135149
{
136150
public:
151+
/** Default constructor */
152+
CLLKTrackerStage1Kernel();
153+
/** Prevent instances of this class from being copied (As this class contains pointers) */
154+
CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete;
155+
/** Prevent instances of this class from being copied (As this class contains pointers) */
156+
CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete;
157+
/** Allow instances of this class to be moved */
158+
CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default;
159+
/** Allow instances of this class to be moved */
160+
CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default;
137161
/** Initialise the kernel input and output
138162
*
139163
* @param[in] new_input Pointer to the input new tensor. Data types supported: U8
@@ -145,13 +169,15 @@ class CLLKTrackerStage1Kernel : public ICLKernel
145169
* @param[in] num_iterations The maximum number of iterations before terminating the algorithm
146170
* @param[in] window_dimension The size of the window on which to perform the algorithm
147171
* @param[in] level The pyramid level
148-
* @param[in] border_offset The offset used to define the boundary of the tracked pixels in different border modes
149172
*/
150173
void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
151-
Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level, int32_t border_offset);
174+
Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
152175

153176
// Inherited methods overridden:
154177
void run(const Window &window, cl::CommandQueue &queue) override;
178+
179+
private:
180+
const ICLTensor *_new_input;
155181
};
156182
}
157183
#endif /*__ARM_COMPUTE_CLLKTRACKERKERNEL_H__ */

0 commit comments

Comments
 (0)