Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
215 changes: 215 additions & 0 deletions backends/aoti/common_shims_slim.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/aoti/export.h>
#include <executorch/runtime/core/error.h>
#include <cstdint>
#include <unordered_map>
#include <vector>

// Uses conditional compilation to separate the implementation between
// CUDA backend (SlimTensor) and other backends like MPS (ETensor).
// The caller determines which path is used by defining CUDA_AVAILABLE.
#ifdef CUDA_AVAILABLE
#include <executorch/backends/aoti/slim/core/SlimTensor.h>
#else
#include <executorch/runtime/core/exec_aten/exec_aten.h>
#endif

namespace executorch {
namespace backends {
namespace aoti {

// Common using declarations for ExecuTorch types
using executorch::runtime::Error;

// ============================================================
// Tensor Type Definition - branched based on CUDA_AVAILABLE
// ============================================================
#ifdef CUDA_AVAILABLE
using Tensor = executorch::backends::aoti::slim::SlimTensor;
#else
using Tensor = executorch::runtime::etensor::Tensor;
#endif

// Common AOTI type aliases
using AOTIRuntimeError = Error;
using AOTITorchError = Error;

#ifndef CUDA_AVAILABLE
namespace internal {
// Global storage for tensor metadata (ETensor path only)
// SlimTensor stores sizes/strides directly in int64_t[] - no caching needed
inline std::unordered_map<Tensor*, std::vector<int64_t>>& tensor_to_sizes() {
static std::unordered_map<Tensor*, std::vector<int64_t>> instance;
return instance;
}
inline std::unordered_map<Tensor*, std::vector<int64_t>>& tensor_to_strides() {
static std::unordered_map<Tensor*, std::vector<int64_t>> instance;
return instance;
}
} // namespace internal
#endif

// ============================================================
// Basic Property Getters - Inline implementations
// ============================================================

inline AOTITorchError aoti_torch_get_data_ptr(
Tensor* tensor,
void** ret_data_ptr) {
if (tensor == nullptr) {
return Error::InvalidArgument;
}
if (ret_data_ptr == nullptr) {
return Error::InvalidArgument;
}

#ifdef CUDA_AVAILABLE
*ret_data_ptr = tensor->data_ptr();
#else
*ret_data_ptr = tensor->mutable_data_ptr();
#endif
return Error::Ok;
}

inline AOTITorchError aoti_torch_get_sizes(
Tensor* tensor,
int64_t** ret_sizes) {
if (tensor == nullptr) {
return Error::InvalidArgument;
}
if (ret_sizes == nullptr) {
return Error::InvalidArgument;
}

#ifdef CUDA_AVAILABLE
// SlimTensor stores sizes directly in int64_t[] - no caching needed
*ret_sizes = const_cast<int64_t*>(tensor->sizes().data());
#else
auto it = internal::tensor_to_sizes().find(tensor);
bool needs_update = false;

if (it == internal::tensor_to_sizes().end()) {
needs_update = true;
} else {
// Validate cached metadata matches current tensor state
auto tensor_sizes = tensor->sizes();
needs_update = !std::equal(
it->second.begin(),
it->second.end(),
tensor_sizes.begin(),
tensor_sizes.end());
}

if (needs_update) {
std::vector<int64_t> sizes(tensor->dim());
auto tensor_sizes = tensor->sizes();
for (int i = 0; i < tensor->dim(); i++) {
sizes[i] = tensor_sizes[i];
}
it = internal::tensor_to_sizes()
.insert_or_assign(tensor, std::move(sizes))
.first;
}

// For 0D tensors, data() returns nullptr on empty vectors
if (it->second.empty()) {
static int64_t empty_sizes_placeholder = 0;
*ret_sizes = &empty_sizes_placeholder;
} else {
*ret_sizes = it->second.data();
}
#endif
return Error::Ok;
}

inline AOTITorchError aoti_torch_get_strides(
Tensor* tensor,
int64_t** ret_strides) {
if (tensor == nullptr) {
return Error::InvalidArgument;
}
if (ret_strides == nullptr) {
return Error::InvalidArgument;
}

#ifdef CUDA_AVAILABLE
// SlimTensor stores strides directly in int64_t[] - no caching needed
*ret_strides = const_cast<int64_t*>(tensor->strides().data());
#else
auto it = internal::tensor_to_strides().find(tensor);
bool needs_update = false;

if (it == internal::tensor_to_strides().end()) {
needs_update = true;
} else {
// Validate cached metadata matches current tensor state
auto tensor_strides = tensor->strides();
needs_update = !std::equal(
it->second.begin(),
it->second.end(),
tensor_strides.begin(),
tensor_strides.end());
}

if (needs_update) {
std::vector<int64_t> strides(tensor->dim());
auto tensor_strides = tensor->strides();
for (int i = 0; i < tensor->dim(); i++) {
strides[i] = tensor_strides[i];
}
it = internal::tensor_to_strides()
.insert_or_assign(tensor, std::move(strides))
.first;
}

// For 0D tensors, data() returns nullptr on empty vectors
if (it->second.empty()) {
static int64_t empty_strides_placeholder = 0;
*ret_strides = &empty_strides_placeholder;
} else {
*ret_strides = it->second.data();
}
#endif
return Error::Ok;
}

inline AOTITorchError aoti_torch_get_dtype(Tensor* tensor, int32_t* ret_dtype) {
if (tensor == nullptr) {
return Error::InvalidArgument;
}
if (ret_dtype == nullptr) {
return Error::InvalidArgument;
}

#ifdef CUDA_AVAILABLE
*ret_dtype = static_cast<int32_t>(tensor->dtype());
#else
*ret_dtype = static_cast<int32_t>(tensor->scalar_type());
#endif
return Error::Ok;
}

inline AOTITorchError aoti_torch_get_dim(Tensor* tensor, int64_t* ret_dim) {
if (tensor == nullptr) {
return Error::InvalidArgument;
}
if (ret_dim == nullptr) {
return Error::InvalidArgument;
}

*ret_dim = static_cast<int64_t>(tensor->dim());
return Error::Ok;
}

} // namespace aoti
} // namespace backends
} // namespace executorch
18 changes: 18 additions & 0 deletions backends/aoti/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,21 @@ def define_common_targets():
":delegate_handle",
],
)

# SlimTensor-based common shims (header-only library)
# The caller determines which tensor type is used by defining CUDA_AVAILABLE.
# - With CUDA_AVAILABLE=1: Uses SlimTensor
# - Without CUDA_AVAILABLE: Uses ETensor
runtime.cxx_library(
name = "common_shims_slim",
headers = [
"common_shims_slim.h",
"export.h",
],
visibility = ["@EXECUTORCH_CLIENTS"],
deps = [
"//executorch/runtime/core:core",
"//executorch/runtime/core/exec_aten:lib",
"//executorch/backends/aoti/slim/core:slimtensor",
],
)
25 changes: 25 additions & 0 deletions backends/aoti/tests/TARGETS
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
load("@fbcode_macros//build_defs:cpp_unittest.bzl", "cpp_unittest")
load("@fbcode_macros//build_defs/lib:re_test_utils.bzl", "re_test_utils")

oncall("executorch")

Expand All @@ -20,3 +21,27 @@ cpp_unittest(
"//executorch/extension/tensor:tensor",
],
)

cpp_unittest(
name = "test_common_shims_slim",
srcs = [
"test_common_shims_slim.cpp",
],
deps = [
"//executorch/backends/aoti:common_shims_slim",
"//executorch/backends/aoti/slim/core:slimtensor",
"//executorch/backends/aoti/slim/factory:empty",
"//executorch/runtime/core:core",
"//executorch/runtime/platform:platform",
],
external_deps = [
("cuda", None, "cuda-lazy"),
],
preprocessor_flags = [
"-DCUDA_AVAILABLE=1",
],
keep_gpu_sections = True,
remote_execution = re_test_utils.remote_execution(
platform = "gpu-remote-execution",
),
)
Loading
Loading