Skip to content

Commit 1f3bf6b

Browse files
author
Jenkins
committed
Compute Library v24.11.1
1 parent f44f09d commit 1f3bf6b

File tree

71 files changed

+24018
-25763
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+24018
-25763
lines changed

Android.bp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,8 @@ cc_library_static {
426426
"src/cpu/kernels/CpuDirectConv2dKernel.cpp",
427427
"src/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp",
428428
"src/cpu/kernels/CpuDirectConv3dKernel.cpp",
429+
"src/cpu/kernels/CpuDynamicGemmKernel.cpp",
430+
"src/cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp",
429431
"src/cpu/kernels/CpuElementwiseKernel.cpp",
430432
"src/cpu/kernels/CpuElementwiseUnaryKernel.cpp",
431433
"src/cpu/kernels/CpuFillKernel.cpp",
@@ -609,6 +611,7 @@ cc_library_static {
609611
"src/cpu/operators/CpuDequantize.cpp",
610612
"src/cpu/operators/CpuDirectConv2d.cpp",
611613
"src/cpu/operators/CpuDirectConv3d.cpp",
614+
"src/cpu/operators/CpuDynamicGemm.cpp",
612615
"src/cpu/operators/CpuElementwise.cpp",
613616
"src/cpu/operators/CpuElementwiseUnary.cpp",
614617
"src/cpu/operators/CpuFill.cpp",

CMakeLists.txt

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2828
list(APPEND CMAKE_MESSAGE_CONTEXT ArmCompute)
2929
project(
3030
ArmCompute
31-
VERSION 43.0.0
31+
VERSION 44.0.0
3232
DESCRIPTION
3333
"The Arm Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A CPU and Arm® Mali™ GPU architectures"
3434
LANGUAGES C CXX ASM)
@@ -333,3 +333,16 @@ if(ARM_COMPUTE_BUILD_EXAMPLES)
333333
endforeach()
334334

335335
endif() # ARM_COMPUTE_BUILD_EXAMPLES
336+
337+
# Install libraries
338+
install(TARGETS arm_compute arm_compute_graph
339+
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
340+
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
341+
342+
# Install test executables
343+
if(ARM_COMPUTE_BUILD_TESTING)
344+
install(TARGETS arm_compute_validation_framework arm_compute_benchmark arm_compute_validation
345+
RUNTIME DESTINATION "${CMAKE_INSTALL_LIBDIR}/tests"
346+
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}/tests"
347+
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}/tests")
348+
endif()

README.md

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
<img src="https://raw.githubusercontent.com/ARM-software/ComputeLibrary/gh-pages/ACL_logo.png"/><br><br>
1010
</div>
1111

12-
# Compute Library ![](https://img.shields.io/badge/latest_release-24.11-green)
12+
# Compute Library ![](https://img.shields.io/badge/latest_release-24.11.1-green)
1313

1414

1515
The Compute Library is a collection of low-level machine learning functions optimized for Arm® Cortex®-A, Arm® Neoverse® and Arm® Mali™ GPUs architectures.<br>
@@ -37,7 +37,7 @@ Key Features:
3737
<br>
3838

3939
## Documentation
40-
[![Documentation](https://img.shields.io/badge/documentation-24.11-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/index.xhtml)
40+
[![Documentation](https://img.shields.io/badge/documentation-24.11.1-green)](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11.1/index.xhtml)
4141

4242
> Note: The documentation includes the reference API, changelogs, build guide, contribution guide, errata, etc.
4343
@@ -50,22 +50,22 @@ All the binaries can be downloaded from [here](https://github.com/ARM-software/C
5050

5151
| Platform | Operating System | Release archive (Download) |
5252
| -------------- | ---------------- | -------------------------- |
53-
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-bin.tar.gz) |
54-
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) |
55-
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |
56-
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |
53+
| Raspberry Pi 4 | Linux® 32bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-armv7a-cpu-bin.tar.gz) |
54+
| Raspberry Pi 4 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) |
55+
| Odroid N2 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
56+
| HiKey960 | Linux® 64bit | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
5757

5858
<br>
5959

6060
| Architecture | Operating System | Release archive (Download) |
6161
| ------------ | ---------------- | -------------------------- |
62-
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-armv7a-cpu-gpu-bin.tar.gz) |
63-
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-android-aarch64-cpu-gpu-bin.tar.gz) |
64-
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11/arm_compute-v24.11-linux-aarch64-cpu-gpu-bin.tar.gz) |
62+
| armv7 | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-armv7a-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-armv7a-cpu-gpu-bin.tar.gz) |
63+
| arm64-v8a | Android™ | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-android-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-android-aarch64-cpu-gpu-bin.tar.gz) |
64+
| arm64-v8a | Linux® | [![](https://img.shields.io/badge/build-neon-orange)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-bin.tar.gz) [![](https://img.shields.io/badge/build-neon+cl-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/download/v24.11.1/arm_compute-v24.11.1-linux-aarch64-cpu-gpu-bin.tar.gz) |
6565

6666
<br>
6767

68-
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.11-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.11)
68+
Please refer to the following link for more pre-built binaries: [![](https://img.shields.io/badge/v24.11.1-bins-yellowgreen)](https://github.com/ARM-software/ComputeLibrary/releases/tag/v24.11.1)
6969

7070
Pre-build binaries are generated with the following security / good coding practices related flags:
7171
> -Wall, -Wextra, -Wformat=2, -Winit-self, -Wstrict-overflow=2, -Wswitch-default, -Woverloaded-virtual, -Wformat-security, -Wctor-dtor-privacy, -Wsign-promo, -Weffc++, -pedantic, -fstack-protector-strong
@@ -107,13 +107,13 @@ Pre-build binaries are generated with the following security / good coding pract
107107

108108
## Experimental builds
109109

110-
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/how_to_build.xhtml) for more details.
110+
**⚠ Important** Bazel and CMake builds are experimental CPU only builds, please see the [documentation](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11.1/how_to_build.xhtml) for more details.
111111

112112
<br>
113113

114114
## How to contribute
115115

116-
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11/contribution_guidelines.xhtml).
116+
Contributions to the Compute Library are more than welcome. If you are interested on contributing, please have a look at our [how to contribute guidelines](https://artificial-intelligence.sites.arm.com/computelibrary/v24.11.1/contribution_guidelines.xhtml).
117117

118118
### Developer Certificate of Origin (DCO)
119119
Before the Compute Library accepts your contribution, you need to certify its origin and give us your permission. To manage this process we use the Developer Certificate of Origin (DCO) V1.1 (https://developercertificate.org/)

SConscript

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ import codecs
3333
import platform
3434
import SCons
3535

36-
VERSION = "v24.11"
37-
LIBRARY_VERSION_MAJOR = 43
36+
VERSION = "v24.11.1"
37+
LIBRARY_VERSION_MAJOR = 44
3838
LIBRARY_VERSION_MINOR = 0
3939
LIBRARY_VERSION_PATCH = 0
4040
SONAME_VERSION = str(LIBRARY_VERSION_MAJOR) + "." + str(LIBRARY_VERSION_MINOR) + "." + str(LIBRARY_VERSION_PATCH)

arm_compute/core/TensorShape.h

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2021, 2023 Arm Limited.
2+
* Copyright (c) 2016-2021, 2023-2024 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -21,8 +21,8 @@
2121
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
* SOFTWARE.
2323
*/
24-
#ifndef ARM_COMPUTE_TENSORSHAPE_H
25-
#define ARM_COMPUTE_TENSORSHAPE_H
24+
#ifndef ACL_ARM_COMPUTE_CORE_TENSORSHAPE_H
25+
#define ACL_ARM_COMPUTE_CORE_TENSORSHAPE_H
2626

2727
#include "arm_compute/core/Dimensions.h"
2828
#include "arm_compute/core/Error.h"
@@ -35,7 +35,12 @@
3535

3636
namespace arm_compute
3737
{
38-
/** Shape of a tensor */
38+
/** Shape of a tensor.
39+
*
40+
* It is allowed to set one or several dimensions of a tensor shape to size 0.
41+
* In this case the dimensions of size 0 and the whole tensor shape are
42+
* considered dynamic.
43+
*/
3944
class TensorShape : public Dimensions<size_t>
4045
{
4146
public:
@@ -77,26 +82,17 @@ class TensorShape : public Dimensions<size_t>
7782
*/
7883
TensorShape &set(size_t dimension, size_t value, bool apply_dim_correction = true, bool increase_dim_unit = true)
7984
{
80-
// Clear entire shape if one dimension is zero
81-
if (value == 0)
82-
{
83-
_num_dimensions = 0;
84-
std::fill(_id.begin(), _id.end(), 0);
85-
}
86-
else
87-
{
88-
// Make sure all empty dimensions are filled with 1
89-
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
85+
// Make sure all empty dimensions are filled with 1
86+
std::fill(_id.begin() + _num_dimensions, _id.end(), 1);
9087

91-
// Set the specified dimension and increase the number of dimensions if
92-
// necessary
93-
Dimensions::set(dimension, value, increase_dim_unit);
88+
// Set the specified dimension and increase the number of dimensions if
89+
// necessary
90+
Dimensions::set(dimension, value, increase_dim_unit);
9491

95-
// Correct number dimensions to ignore trailing dimensions of size 1
96-
if (apply_dim_correction)
97-
{
98-
apply_dimension_correction();
99-
}
92+
// Correct number dimensions to ignore trailing dimensions of size 1
93+
if (apply_dim_correction)
94+
{
95+
apply_dimension_correction();
10096
}
10197
return *this;
10298
}
@@ -244,6 +240,33 @@ class TensorShape : public Dimensions<size_t>
244240
return bc_shape;
245241
}
246242

243+
/** Check if the tensor shape is dynamic.
244+
*
245+
* If any dimension of the tensor shape has size 0, then this dimension
246+
* and the whole shape are considered dynamic.
247+
*
248+
* @return True if the tensor shape is dynamic, else false.
249+
*/
250+
bool is_dynamic() const
251+
{
252+
return std::any_of(cbegin(), cend(), [](const auto &s) { return s == 0; });
253+
}
254+
255+
/** Check if a given dimension of the tensor shape is dynamic.
256+
*
257+
* If a dimension of the tensor shape has size 0, then this dimension
258+
* and the whole shape are considered dynamic.
259+
*
260+
* @param[in] dim Dimension index.
261+
*
262+
* @return True if dimension dim is dynamic, else false.
263+
*/
264+
bool is_dynamic(const size_t dim) const
265+
{
266+
ARM_COMPUTE_ERROR_ON(dim >= TensorShape::num_max_dimensions);
267+
return _id[dim] == 0;
268+
}
269+
247270
private:
248271
/** Remove trailing dimensions of size 1 from the reported number of dimensions. */
249272
void apply_dimension_correction()
@@ -262,4 +285,4 @@ class TensorShape : public Dimensions<size_t>
262285
}
263286
};
264287
} // namespace arm_compute
265-
#endif /*ARM_COMPUTE_TENSORSHAPE_H*/
288+
#endif // ACL_ARM_COMPUTE_CORE_TENSORSHAPE_H

arm_compute/runtime/experimental/low_level/CpuGemmAssemblyDispatch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ class CpuGemmAssemblyDispatch : arm_compute::experimental::IOperator
149149
const ITensorInfo *d,
150150
const GEMMInfo &gemm_info = GEMMInfo());
151151

152+
/** Indicates whether or not there is a implementation for the configured GEMM
153+
* @return a bool: true if the implementation is stateless; false if not.
154+
*/
155+
bool has_stateless_impl() const;
156+
152157
/** Checks if activation is supported by the gemm assembly dispatcher
153158
*
154159
* @param[in] activation Activation to check

docs/Doxyfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ PROJECT_NAME = "Compute Library"
6060
# could be handy for archiving the generated documentation or if some version
6161
# control system is used.
6262

63-
PROJECT_NUMBER = 24.11
63+
PROJECT_NUMBER = 24.11.1
6464

6565
# Using the PROJECT_BRIEF tag one can provide an optional one line description
6666
# for a project that appears at the top of each page and should give viewer a

filelist.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1581,6 +1581,8 @@
15811581
"files": {
15821582
"common": [
15831583
"src/cpu/kernels/CpuConvertQuantizedSignednessKernel.cpp",
1584+
"src/cpu/kernels/CpuDynamicGemmKernel.cpp",
1585+
"src/cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp",
15841586
"src/cpu/kernels/CpuGemmMatrixAdditionKernel.cpp",
15851587
"src/cpu/kernels/CpuGemmMatrixMultiplyKernel.cpp",
15861588
"src/cpu/kernels/CpuGemmTranspose1xWKernel.cpp",
@@ -1593,6 +1595,7 @@
15931595
"src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.cpp",
15941596
"src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp",
15951597
"src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.cpp",
1598+
"src/cpu/operators/CpuDynamicGemm.cpp",
15961599
"src/cpu/operators/CpuGemm.cpp",
15971600
"src/cpu/operators/CpuGemmLowpOutputStage.cpp",
15981601
"src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp",

src/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,8 @@ filegroup(
709709
"cpu/kernels/CpuDirectConv2dKernel.cpp",
710710
"cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp",
711711
"cpu/kernels/CpuDirectConv3dKernel.cpp",
712+
"cpu/kernels/CpuDynamicGemmKernel.cpp",
713+
"cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp",
712714
"cpu/kernels/CpuElementwiseKernel.cpp",
713715
"cpu/kernels/CpuElementwiseUnaryKernel.cpp",
714716
"cpu/kernels/CpuFillKernel.cpp",
@@ -892,6 +894,7 @@ filegroup(
892894
"cpu/operators/CpuDequantize.cpp",
893895
"cpu/operators/CpuDirectConv2d.cpp",
894896
"cpu/operators/CpuDirectConv3d.cpp",
897+
"cpu/operators/CpuDynamicGemm.cpp",
895898
"cpu/operators/CpuElementwise.cpp",
896899
"cpu/operators/CpuElementwiseUnary.cpp",
897900
"cpu/operators/CpuFill.cpp",

src/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,8 @@ target_sources(
700700
cpu/kernels/CpuDirectConv2dKernel.cpp
701701
cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp
702702
cpu/kernels/CpuDirectConv3dKernel.cpp
703+
cpu/kernels/CpuDynamicGemmKernel.cpp
704+
cpu/kernels/CpuDynamicGemmKernelHeuristics.cpp
703705
cpu/kernels/CpuElementwiseKernel.cpp
704706
cpu/kernels/CpuElementwiseUnaryKernel.cpp
705707
cpu/kernels/CpuFillKernel.cpp
@@ -883,6 +885,7 @@ target_sources(
883885
cpu/operators/CpuDequantize.cpp
884886
cpu/operators/CpuDirectConv2d.cpp
885887
cpu/operators/CpuDirectConv3d.cpp
888+
cpu/operators/CpuDynamicGemm.cpp
886889
cpu/operators/CpuElementwise.cpp
887890
cpu/operators/CpuElementwiseUnary.cpp
888891
cpu/operators/CpuFill.cpp

0 commit comments

Comments
 (0)