Skip to content

Commit 8981848

Browse files
committed
CANN: Use smart pointers to manage ACL objects
Previously, ACL objects were managed via manual destruction, which led to multiple memory-leak issues during runtime. This patch replaces manual memory management with smart pointers so that ACL objects are properly released and ownership is clearly defined. Note that the ownership of an ACL object belongs to the function that creates it. Other internal functions should operate on these ACL objects using raw pointers to avoid unintended ownership transfers. Additionally, since aclTensorList automatically frees its contained aclTensor objects, any aclTensor added to a tensor list must release ownership to avoid double free operations. This PR also removes the asynchronous task submission mechanism. Due to changes in recent CANN versions, tiling time has significantly decreased. Even with a dual-thread submission model, the dispatch overhead still falls on the critical path, making async submission less beneficial. Moreover, aclGraph support provides a much better path to reducing operator dispatch latency.
1 parent 97d5117 commit 8981848

File tree

7 files changed

+810
-3659
lines changed

7 files changed

+810
-3659
lines changed

ggml/src/ggml-cann/Doxyfile

Lines changed: 0 additions & 2579 deletions
This file was deleted.

ggml/src/ggml-cann/acl_tensor.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,14 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
4848
default:
4949
return ACL_DT_UNDEFINED;
5050
}
51-
return ACL_DT_UNDEFINED;
5251
}
5352

54-
aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
55-
int64_t * ne,
56-
size_t * nb,
57-
int64_t dims,
58-
aclFormat format,
59-
size_t offset) {
53+
acl_tensor_ptr ggml_cann_create_tensor(const ggml_tensor * tensor,
54+
int64_t * ne,
55+
size_t * nb,
56+
int64_t dims,
57+
aclFormat format,
58+
size_t offset) {
6059
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
6160
// added.
6261
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
@@ -87,10 +86,20 @@ aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
8786
std::reverse(acl_ne, acl_ne + final_dims);
8887
std::reverse(acl_stride, acl_stride + final_dims);
8988

90-
aclTensor * acl_tensor = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
91-
elem_offset, format, &acl_storage_len, 1, tensor->data);
89+
aclTensor * raw = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, elem_offset,
90+
format, &acl_storage_len, 1, tensor->data);
9291

93-
return acl_tensor;
92+
return acl_tensor_ptr(raw);
93+
}
94+
95+
acl_int_array_ptr ggml_cann_create_int_array(const int64_t * value, uint64_t size) {
96+
aclIntArray * raw = aclCreateIntArray(value, size);
97+
return acl_int_array_ptr(raw);
98+
}
99+
100+
acl_scalar_ptr ggml_cann_create_scalar(void * value, aclDataType dataType) {
101+
aclScalar * raw = aclCreateScalar(value, dataType);
102+
return acl_scalar_ptr(raw);
94103
}
95104

96105
bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1) {

ggml/src/ggml-cann/acl_tensor.h

Lines changed: 126 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@
2323
#ifndef CANN_ACL_TENSOR_H
2424
#define CANN_ACL_TENSOR_H
2525

26-
#include <algorithm>
27-
#include <cstring>
26+
#include "common.h"
2827

2928
#include <aclnn/aclnn_base.h>
30-
#include "common.h"
29+
30+
#include <algorithm>
31+
#include <cstring>
3132

3233
/**
3334
* @brief Maps a ggml_type to its corresponding aclDataType.
@@ -43,6 +44,47 @@
4344
*/
4445
aclDataType ggml_cann_type_mapping(ggml_type type);
4546

47+
// Deleter for aclTensor.
48+
struct acl_tensor_deleter {
49+
void operator()(aclTensor * ptr) const noexcept {
50+
if (ptr != nullptr) {
51+
ACL_CHECK(aclDestroyTensor(ptr));
52+
}
53+
}
54+
};
55+
56+
// Deleter for aclIntArray.
57+
struct acl_int_array_deleter {
58+
void operator()(aclIntArray * ptr) const noexcept {
59+
if (ptr != nullptr) {
60+
ACL_CHECK(aclDestroyIntArray(ptr));
61+
}
62+
}
63+
};
64+
65+
// Deleter for aclScalar.
66+
struct acl_scalar_deleter {
67+
void operator()(aclScalar * ptr) const noexcept {
68+
if (ptr != nullptr) {
69+
ACL_CHECK(aclDestroyScalar(ptr));
70+
}
71+
}
72+
};
73+
74+
// Deleter for aclTensorList.
75+
struct acl_tensor_list_deleter {
76+
void operator()(aclTensorList * ptr) const noexcept {
77+
if (ptr != nullptr) {
78+
ACL_CHECK(aclDestroyTensorList(ptr));
79+
}
80+
}
81+
};
82+
83+
using acl_tensor_ptr = std::unique_ptr<aclTensor, acl_tensor_deleter>;
84+
using acl_int_array_ptr = std::unique_ptr<aclIntArray, acl_int_array_deleter>;
85+
using acl_scalar_ptr = std::unique_ptr<aclScalar, acl_scalar_deleter>;
86+
using acl_tensor_list_ptr = std::unique_ptr<aclTensorList, acl_tensor_list_deleter>;
87+
4688
/**
4789
* @brief Creates an ACL tensor from a ggml_tensor with optional shape.
4890
*
@@ -62,12 +104,12 @@ aclDataType ggml_cann_type_mapping(ggml_type type);
62104
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
63105
* @return Pointer to the created ACL tensor.
64106
*/
65-
aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
66-
int64_t * ne = nullptr,
67-
size_t * nb = nullptr,
68-
int64_t dims = 0,
69-
aclFormat format = ACL_FORMAT_ND,
70-
size_t offset = 0);
107+
acl_tensor_ptr ggml_cann_create_tensor(const ggml_tensor * tensor,
108+
int64_t * ne = nullptr,
109+
size_t * nb = nullptr,
110+
int64_t dims = 0,
111+
aclFormat format = ACL_FORMAT_ND,
112+
size_t offset = 0);
71113

72114
/**
73115
* @brief Template for creating an ACL tensor from provided parameters. typename TYPE
@@ -90,14 +132,14 @@ aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
90132
* @return Pointer to the created ACL tensor.
91133
*/
92134
template <typename TYPE>
93-
aclTensor * ggml_cann_create_tensor(void * data_ptr,
94-
aclDataType dtype,
95-
TYPE type_size,
96-
int64_t * ne,
97-
TYPE * nb,
98-
int64_t dims,
99-
aclFormat format = ACL_FORMAT_ND,
100-
size_t offset = 0) {
135+
acl_tensor_ptr ggml_cann_create_tensor(void * data_ptr,
136+
aclDataType dtype,
137+
TYPE type_size,
138+
int64_t * ne,
139+
TYPE * nb,
140+
int64_t dims,
141+
aclFormat format = ACL_FORMAT_ND,
142+
size_t offset = 0) {
101143
int64_t tmp_ne[GGML_MAX_DIMS * 2];
102144
int64_t tmp_stride[GGML_MAX_DIMS * 2];
103145

@@ -114,10 +156,75 @@ aclTensor * ggml_cann_create_tensor(void * data_ptr,
114156
std::reverse(tmp_ne, tmp_ne + dims);
115157
std::reverse(tmp_stride, tmp_stride + dims);
116158

117-
aclTensor * acl_tensor =
159+
aclTensor * raw =
118160
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, format, &acl_storage_len, 1, data_ptr);
119161

120-
return acl_tensor;
162+
return acl_tensor_ptr(raw);
163+
}
164+
165+
/**
166+
* @brief Create an ACL int array resource wrapped in a smart pointer.
167+
*
168+
* This function constructs an aclIntArray from the provided int64_t values
169+
* and returns it as an acl_int_array_ptr (a std::unique_ptr with a custom
170+
* deleter). The returned pointer owns the ACL resource and will automatically
171+
* destroy it via aclDestroyIntArray().
172+
*
173+
* @param value Pointer to the int64_t elements.
174+
* @param size Number of elements in value.
175+
*
176+
* @return A smart pointer managing the created ACL int array.
177+
*/
178+
acl_int_array_ptr ggml_cann_create_int_array(const int64_t * value, uint64_t size);
179+
180+
/**
181+
* @brief Create an ACL scalar resource wrapped in a smart pointer.
182+
*
183+
* This function constructs an aclScalar from the raw value pointer and ACL
184+
* data type, then returns it as an acl_scalar_ptr (a std::unique_ptr with
185+
* a custom deleter). The returned pointer owns the ACL scalar and will
186+
* automatically destroy it via aclDestroyScalar().
187+
*
188+
* @param value Pointer to the raw scalar memory.
189+
* @param dataType ACL data type of the scalar.
190+
*
191+
* @return A smart pointer managing the created ACL scalar.
192+
*/
193+
acl_scalar_ptr ggml_cann_create_scalar(void * value, aclDataType dataType);
194+
195+
/**
196+
* @brief Create an ACL tensor list from multiple tensor smart pointers.
197+
*
198+
* This function accepts a variadic list of acl_tensor_ptr (a unique_ptr with
199+
* custom deleter) and produces an aclTensorList using aclCreateTensorList().
200+
*
201+
* The lifecycle management of the tensor objects changes as follows:
202+
* - aclCreateTensorList() takes ownership of the tensors
203+
* - Each input smart pointer releases ownership using release()
204+
* - As a result, the tensors will NOT be destroyed by unique_ptr
205+
* - Instead, they will be destroyed when aclDestroyTensorList() is called
206+
*
207+
* This ensures correct ownership transfer and prevents double-free situations.
208+
*
209+
* @param acl_tensor_ptr Variadic template parameter; each argument must be
210+
* a unique_ptr-like type supporting get() and release().
211+
*
212+
* @param tensors Variadic list of acl_tensor_ptr objects. Ownership of
213+
* each tensor is transferred away from these smart pointers.
214+
*
215+
* @return A smart pointer (acl_tensor_list_ptr) owning the created ACL tensor list.
216+
*
217+
* @note This implementation is C++11 compatible. The ownership-release process is
218+
* executed using a pack expansion inside an initializer list.
219+
*/
220+
template <typename... acl_tensor_ptr> acl_tensor_list_ptr ggml_cann_create_tensor_list(acl_tensor_ptr &&... tensors) {
221+
aclTensor * raw_tensors[] = { tensors.get()... };
222+
aclTensorList * raw = aclCreateTensorList(raw_tensors, sizeof...(tensors));
223+
// aclTensor will release by aclTensorList, so release ownership without
224+
// destroying the tensor
225+
int dummy[] = { (tensors.release(), 0)... };
226+
GGML_UNUSED(dummy);
227+
return acl_tensor_list_ptr(raw);
121228
}
122229

123230
/**

0 commit comments

Comments
 (0)