@@ -44,6 +44,9 @@ use tracing::{debug, trace};
4444/// <https://docs.nvidia.com/cuda/archive/12.8.1/pdf/CUDA_C_Best_Practices_Guide.pdf>
4545const CONSTANT_MEMORY_SIZE_LIMIT_BYTES : u64 = 64 * 1024 ;
4646
47+ /// Threshold for warning when approaching 80% of constant memory limit
48+ const CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES : u64 = ( CONSTANT_MEMORY_SIZE_LIMIT_BYTES * 80 ) / 100 ;
49+
4750pub ( crate ) struct CodegenCx < ' ll , ' tcx > {
4851 pub tcx : TyCtxt < ' tcx > ,
4952
@@ -104,6 +107,9 @@ pub(crate) struct CodegenCx<'ll, 'tcx> {
104107 pub codegen_args : CodegenArgs ,
105108 // the value of the last call instruction. Needed for return type remapping.
106109 pub last_call_llfn : Cell < Option < & ' ll Value > > ,
110+
111+ /// Tracks cumulative constant memory usage in bytes for compile-time diagnostics
112+ constant_memory_usage : Cell < u64 > ,
107113}
108114
109115impl < ' ll , ' tcx > CodegenCx < ' ll , ' tcx > {
@@ -174,6 +180,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
174180 dbg_cx,
175181 codegen_args : CodegenArgs :: from_session ( tcx. sess ( ) ) ,
176182 last_call_llfn : Cell :: new ( None ) ,
183+ constant_memory_usage : Cell :: new ( 0 ) ,
177184 } ;
178185 cx. build_intrinsics_map ( ) ;
179186 cx
@@ -281,16 +288,47 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
281288 // static and many small ones, you might want the small ones to all be
282289 // in constant memory or just the big one depending on your workload.
283290 let layout = self . layout_of ( ty) ;
284- if layout. size . bytes ( ) > CONSTANT_MEMORY_SIZE_LIMIT_BYTES {
291+ let size_bytes = layout. size . bytes ( ) ;
292+ let current_usage = self . constant_memory_usage . get ( ) ;
293+ let new_usage = current_usage + size_bytes;
294+
295+ // Check if this single static is too large for constant memory
296+ if size_bytes > CONSTANT_MEMORY_SIZE_LIMIT_BYTES {
285297 self . tcx . sess . dcx ( ) . warn ( format ! (
286- "static `{instance}` exceeds the constant memory limit; placing in global memory (performance may be reduced)"
287- ) ) ;
288- // Place instance in global memory if it is too big for constant memory.
289- AddressSpace ( 1 )
290- } else {
291- // Place instance in constant memory if it fits.
292- AddressSpace ( 4 )
298+ "static `{instance}` is {size_bytes} bytes, exceeds the constant memory limit of {} bytes; placing in global memory (performance may be reduced)" ,
299+ CONSTANT_MEMORY_SIZE_LIMIT_BYTES
300+ ) ) ;
301+ return AddressSpace ( 1 ) ;
302+ }
303+
304+ // Check if adding this static would exceed the cumulative limit
305+ if new_usage > CONSTANT_MEMORY_SIZE_LIMIT_BYTES {
306+ self . tcx . sess . dcx ( ) . emit_err ( format ! (
307+ "cannot place static `{instance}` ({size_bytes} bytes) in constant memory: \
308+ cumulative constant memory usage would be {new_usage} bytes, exceeding the {} byte limit. \
309+ Current usage: {current_usage} bytes. \
310+ Consider: (1) using `#[cuda_std::address_space(global)]` on less frequently accessed statics, \
311+ (2) reducing static data sizes, or (3) disabling automatic constant memory placement",
312+ CONSTANT_MEMORY_SIZE_LIMIT_BYTES
313+ ) ) ;
314+ return AddressSpace ( 1 ) ;
293315 }
316+
317+ // If successfully placed in constant memory: update cumulative usage
318+ self . constant_memory_usage . set ( new_usage) ;
319+
320+ // If approaching the threshold: warns
321+ if new_usage > CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES &&
322+ current_usage <= CONSTANT_MEMORY_WARNING_THRESHOLD_BYTES {
323+ self . tcx . sess . dcx ( ) . warn ( format ! (
324+ "constant memory usage is approaching the limit: {new_usage} / {} bytes ({:.1}% used)" ,
325+ CONSTANT_MEMORY_SIZE_LIMIT_BYTES ,
326+ ( new_usage as f64 / CONSTANT_MEMORY_SIZE_LIMIT_BYTES as f64 ) * 100.0
327+ ) ) ;
328+ }
329+
330+ trace ! ( "Placing static `{instance}` ({size_bytes} bytes) in constant memory. Total usage: {new_usage} bytes" ) ;
331+ AddressSpace ( 4 )
294332 }
295333 } else {
296334 AddressSpace :: ZERO
0 commit comments