pytorch · Gasoonjia · Jan 5, 2026
diff --git a/backends/aoti/common_shims_slim.h b/backends/aoti/common_shims_slim.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <executorch/backends/aoti/export.h>
+#include <executorch/runtime/core/error.h>
+#include <cstdint>
+#include <unordered_map>
+#include <vector>
+
+// Uses conditional compilation to separate the implementation between
+// CUDA backend (SlimTensor) and other backends like MPS (ETensor).
+// The caller determines which path is used by defining CUDA_AVAILABLE.
+#ifdef CUDA_AVAILABLE
+#include <executorch/backends/aoti/slim/core/SlimTensor.h>
+#else
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#endif
+
+namespace executorch {
+namespace backends {
+namespace aoti {
+
+// Common using declarations for ExecuTorch types
+using executorch::runtime::Error;
+
+// ============================================================
+// Tensor Type Definition - branched based on CUDA_AVAILABLE
+// ============================================================
+#ifdef CUDA_AVAILABLE
+using Tensor = executorch::backends::aoti::slim::SlimTensor;
+#else
+using Tensor = executorch::runtime::etensor::Tensor;
+#endif
+
+// Common AOTI type aliases
+using AOTIRuntimeError = Error;
+using AOTITorchError = Error;
+
+#ifndef CUDA_AVAILABLE
+namespace internal {
+// Global storage for tensor metadata (ETensor path only)
+// SlimTensor stores sizes/strides directly in int64_t[] - no caching needed
+inline std::unordered_map<Tensor*, std::vector<int64_t>>& tensor_to_sizes() {
+  static std::unordered_map<Tensor*, std::vector<int64_t>> instance;
+  return instance;
+}
+inline std::unordered_map<Tensor*, std::vector<int64_t>>& tensor_to_strides() {
+  static std::unordered_map<Tensor*, std::vector<int64_t>> instance;
+  return instance;
+}
+} // namespace internal
+#endif
+
+// ============================================================
+// Basic Property Getters - Inline implementations
+// ============================================================
+
+inline AOTITorchError aoti_torch_get_data_ptr(
+    Tensor* tensor,
+    void** ret_data_ptr) {
+  if (tensor == nullptr) {
+    return Error::InvalidArgument;
+  }
+  if (ret_data_ptr == nullptr) {
+    return Error::InvalidArgument;
+  }
+
+#ifdef CUDA_AVAILABLE
+  *ret_data_ptr = tensor->data_ptr();
+#else
+  *ret_data_ptr = tensor->mutable_data_ptr();
+#endif
+  return Error::Ok;
+}
+
+inline AOTITorchError aoti_torch_get_sizes(
+    Tensor* tensor,
+    int64_t** ret_sizes) {
+  if (tensor == nullptr) {
+    return Error::InvalidArgument;
+  }
+  if (ret_sizes == nullptr) {
+    return Error::InvalidArgument;
+  }
+
+#ifdef CUDA_AVAILABLE
+  // SlimTensor stores sizes directly in int64_t[] - no caching needed
+  *ret_sizes = const_cast<int64_t*>(tensor->sizes().data());
+#else
+  auto it = internal::tensor_to_sizes().find(tensor);
+  bool needs_update = false;
+
+  if (it == internal::tensor_to_sizes().end()) {
+    needs_update = true;
+  } else {
+    // Validate cached metadata matches current tensor state
+    auto tensor_sizes = tensor->sizes();
+    needs_update = !std::equal(
+        it->second.begin(),
+        it->second.end(),
+        tensor_sizes.begin(),
+        tensor_sizes.end());
+  }
+
+  if (needs_update) {
+    std::vector<int64_t> sizes(tensor->dim());
+    auto tensor_sizes = tensor->sizes();
+    for (int i = 0; i < tensor->dim(); i++) {
+      sizes[i] = tensor_sizes[i];
+    }
+    it = internal::tensor_to_sizes()
+             .insert_or_assign(tensor, std::move(sizes))
+             .first;
+  }
+
+  // For 0D tensors, data() returns nullptr on empty vectors
+  if (it->second.empty()) {
+    static int64_t empty_sizes_placeholder = 0;
+    *ret_sizes = &empty_sizes_placeholder;
+  } else {
+    *ret_sizes = it->second.data();
+  }
+#endif
+  return Error::Ok;
+}
+
+inline AOTITorchError aoti_torch_get_strides(
+    Tensor* tensor,
+    int64_t** ret_strides) {
+  if (tensor == nullptr) {
+    return Error::InvalidArgument;
+  }
+  if (ret_strides == nullptr) {
+    return Error::InvalidArgument;
+  }
+
+#ifdef CUDA_AVAILABLE
+  // SlimTensor stores strides directly in int64_t[] - no caching needed
+  *ret_strides = const_cast<int64_t*>(tensor->strides().data());
+#else
+  auto it = internal::tensor_to_strides().find(tensor);
+  bool needs_update = false;
+
+  if (it == internal::tensor_to_strides().end()) {
+    needs_update = true;
+  } else {
+    // Validate cached metadata matches current tensor state
+    auto tensor_strides = tensor->strides();
+    needs_update = !std::equal(
+        it->second.begin(),
+        it->second.end(),
+        tensor_strides.begin(),
+        tensor_strides.end());
+  }
+
+  if (needs_update) {
+    std::vector<int64_t> strides(tensor->dim());
+    auto tensor_strides = tensor->strides();
+    for (int i = 0; i < tensor->dim(); i++) {
+      strides[i] = tensor_strides[i];
+    }
+    it = internal::tensor_to_strides()
+             .insert_or_assign(tensor, std::move(strides))
+             .first;
+  }
+
+  // For 0D tensors, data() returns nullptr on empty vectors
+  if (it->second.empty()) {
+    static int64_t empty_strides_placeholder = 0;
+    *ret_strides = &empty_strides_placeholder;
+  } else {
+    *ret_strides = it->second.data();
+  }
+#endif
+  return Error::Ok;
+}
+
+inline AOTITorchError aoti_torch_get_dtype(Tensor* tensor, int32_t* ret_dtype) {
+  if (tensor == nullptr) {
+    return Error::InvalidArgument;
+  }
+  if (ret_dtype == nullptr) {
+    return Error::InvalidArgument;
+  }
+
+#ifdef CUDA_AVAILABLE
+  *ret_dtype = static_cast<int32_t>(tensor->dtype());
+#else
+  *ret_dtype = static_cast<int32_t>(tensor->scalar_type());
+#endif
+  return Error::Ok;
+}
+
+inline AOTITorchError aoti_torch_get_dim(Tensor* tensor, int64_t* ret_dim) {
+  if (tensor == nullptr) {
+    return Error::InvalidArgument;
+  }
+  if (ret_dim == nullptr) {
+    return Error::InvalidArgument;
+  }
+
+  *ret_dim = static_cast<int64_t>(tensor->dim());
+  return Error::Ok;
+}
+
+} // namespace aoti
+} // namespace backends
+} // namespace executorch
diff --git a/backends/aoti/targets.bzl b/backends/aoti/targets.bzl
@@ -86,3 +86,21 @@ def define_common_targets():
             ":delegate_handle",
         ],
     )
+
+    # SlimTensor-based common shims (header-only library)
+    # The caller determines which tensor type is used by defining CUDA_AVAILABLE.
+    # - With CUDA_AVAILABLE=1: Uses SlimTensor
+    # - Without CUDA_AVAILABLE: Uses ETensor
+    runtime.cxx_library(
+        name = "common_shims_slim",
+        headers = [
+            "common_shims_slim.h",
+            "export.h",
+        ],
+        visibility = ["@EXECUTORCH_CLIENTS"],
+        deps = [
+            "//executorch/runtime/core:core",
+            "//executorch/runtime/core/exec_aten:lib",
+            "//executorch/backends/aoti/slim/core:slimtensor",
+        ],
+    )
diff --git a/backends/aoti/tests/TARGETS b/backends/aoti/tests/TARGETS
@@ -1,4 +1,5 @@
 load("@fbcode_macros//build_defs:cpp_unittest.bzl", "cpp_unittest")
+load("@fbcode_macros//build_defs/lib:re_test_utils.bzl", "re_test_utils")
 
 oncall("executorch")
 
@@ -20,3 +21,27 @@ cpp_unittest(
         "//executorch/extension/tensor:tensor",
     ],
 )
+
+cpp_unittest(
+    name = "test_common_shims_slim",
+    srcs = [
+        "test_common_shims_slim.cpp",
+    ],
+    deps = [
+        "//executorch/backends/aoti:common_shims_slim",
+        "//executorch/backends/aoti/slim/core:slimtensor",
+        "//executorch/backends/aoti/slim/factory:empty",
+        "//executorch/runtime/core:core",
+        "//executorch/runtime/platform:platform",
+    ],
+    external_deps = [
+        ("cuda", None, "cuda-lazy"),
+    ],
+    preprocessor_flags = [
+        "-DCUDA_AVAILABLE=1",
+    ],
+    keep_gpu_sections = True,
+    remote_execution = re_test_utils.remote_execution(
+        platform = "gpu-remote-execution",
+    ),
+)