Skip to content

Commit f48bd71

Browse files
authored
chore: update feature/p-token branch with master changes (#774)
2 parents d6e5f36 + c8d9260 commit f48bd71

File tree

12 files changed

+10728
-10
lines changed

12 files changed

+10728
-10
lines changed

kmir/src/kmir/kdist/mir-semantics/rt/data.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ Zero-sized types can be decoded trivially into their respective representation.
14101410

14111411
```k
14121412
// zero-sized struct
1413-
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoStructType(_, _, _))
1413+
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoStructType(_, _, _, _))
14141414
=> Aggregate(variantIdx(0), .List) ... </k>
14151415
// zero-sized tuple
14161416
rule <k> #decodeConstant(constantKindZeroSized, _TY, typeInfoTupleType(_))

kmir/src/kmir/kdist/mir-semantics/rt/decoding.md

Lines changed: 93 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ requires "value.md"
1616
requires "numbers.md"
1717
1818
module RT-DECODING
19+
imports INT
1920
imports BOOL
2021
imports MAP
2122
@@ -72,6 +73,66 @@ rule #decodeValue(BYTES, typeInfoArrayType(ELEMTY, noTyConst))
7273
=> #decodeSliceAllocation(BYTES, lookupTy(ELEMTY))
7374
```
7475

76+
### Struct decoding
77+
78+
Structs can have non-trivial layouts: field offsets may not match declaration order, and there may be padding.
79+
Decoding is only performed when explicit layout offsets are available; otherwise we leave the term as `UnableToDecode` via the default rule. This avoids incorrectly decoding data when field reordering or padding is present.
80+
81+
When using layout offsets we always return fields in declaration order within the `Aggregate` (variant 0), regardless of the physical order in memory.
82+
83+
```k
84+
// ---------------------------------------------------------------------------
85+
// Struct decoding rules (top level)
86+
// ---------------------------------------------------------------------------
87+
// Use the offsets when they are provided and the input length is sufficient.
88+
rule #decodeValue(BYTES, typeInfoStructType(_, _, TYS, LAYOUT))
89+
=> Aggregate(variantIdx(0), #decodeStructFieldsWithOffsets(BYTES, TYS, #structOffsets(LAYOUT)))
90+
requires #structOffsets(LAYOUT) =/=K .MachineSizes
91+
andBool 0 <=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT))
92+
andBool lengthBytes(BYTES) >=Int #neededBytesForOffsets(TYS, #structOffsets(LAYOUT))
93+
[preserves-definedness]
94+
95+
// ---------------------------------------------------------------------------
96+
// Offsets and helpers (used by the rules above)
97+
// ---------------------------------------------------------------------------
98+
// MachineSize is in bits in the ABI; convert to bytes for slicing.
99+
syntax Int ::= #msBytes ( MachineSize ) [function, total]
100+
rule #msBytes(machineSize(mirInt(NBITS))) => NBITS /Int 8 [preserves-definedness]
101+
rule #msBytes(machineSize(NBITS)) => NBITS /Int 8 [owise, preserves-definedness]
102+
103+
// Extract field offsets from the struct layout when available (Arbitrary only).
104+
syntax MachineSizes ::= #structOffsets ( MaybeLayoutShape ) [function, total]
105+
rule #structOffsets(someLayoutShape(layoutShape(fieldsShapeArbitrary(mk(OFFSETS)), _, _, _, _))) => OFFSETS
106+
rule #structOffsets(noLayoutShape) => .MachineSizes
107+
rule #structOffsets(_) => .MachineSizes [owise]
108+
109+
// Minimum number of input bytes required to decode all fields by the chosen offsets.
110+
// Uses builtin maxInt to compute max(offset + size). The lists of types and
111+
// offsets must have the same length; if not, this function returns -1 to signal
112+
// an invalid layout for decoding.
113+
syntax Int ::= #neededBytesForOffsets ( Tys , MachineSizes ) [function, total]
114+
rule #neededBytesForOffsets(.Tys, .MachineSizes) => 0
115+
rule #neededBytesForOffsets(TY TYS, OFFSET OFFSETS)
116+
=> maxInt(#msBytes(OFFSET) +Int #elemSize(lookupTy(TY)), #neededBytesForOffsets(TYS, OFFSETS))
117+
// Any remaining pattern indicates a length mismatch between types and offsets.
118+
rule #neededBytesForOffsets(_, _) => -1 [owise]
119+
120+
// Decode each field at its byte offset and return values in declaration order.
121+
syntax List ::= #decodeStructFieldsWithOffsets ( Bytes , Tys , MachineSizes ) [function, total]
122+
rule #decodeStructFieldsWithOffsets(_, .Tys, _OFFSETS) => .List
123+
rule #decodeStructFieldsWithOffsets(_, _TYS, .MachineSizes) => .List [owise]
124+
rule #decodeStructFieldsWithOffsets(BYTES, TY TYS, OFFSET OFFSETS)
125+
=> ListItem(
126+
#decodeValue(
127+
substrBytes(BYTES, #msBytes(OFFSET), #msBytes(OFFSET) +Int #elemSize(lookupTy(TY))),
128+
lookupTy(TY)
129+
)
130+
)
131+
#decodeStructFieldsWithOffsets(BYTES, TYS, OFFSETS)
132+
requires lengthBytes(BYTES) >=Int (#msBytes(OFFSET) +Int #elemSize(lookupTy(TY)))
133+
[preserves-definedness]
134+
```
135+
75136
### Error marker (becomes thunk) for other (unimplemented) cases
76137

77138
All unimplemented cases will become thunks by way of this default rule:
@@ -84,20 +145,33 @@ All unimplemented cases will become thunks by way of this default rule:
84145

85146
```k
86147
// TODO: this function should go into the rt/types.md module
87-
syntax Int ::= #elemSize ( TypeInfo ) [function]
148+
syntax Int ::= #elemSize ( TypeInfo ) [function, total]
88149
```
89150

90151
Known element sizes for common types:
91152

92153
```k
154+
// ---- Primitive types ----
93155
rule #elemSize(typeInfoPrimitiveType(primTypeBool)) => 1
156+
// Rust `char` is a 32-bit Unicode scalar value
157+
rule #elemSize(typeInfoPrimitiveType(primTypeChar)) => 4
94158
rule #elemSize(TYPEINFO) => #bitWidth(#intTypeOf(TYPEINFO)) /Int 8
95159
requires #isIntType(TYPEINFO)
96-
160+
// Floating-point sizes
161+
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF16))) => 2
162+
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF32))) => 4
163+
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF64))) => 8
164+
rule #elemSize(typeInfoPrimitiveType(primTypeFloat(floatTyF128))) => 16
165+
// `str` is unsized; size only known with metadata (e.g., in fat pointers)
166+
rule #elemSize(typeInfoPrimitiveType(primTypeStr)) => 0
167+
168+
// ---- Arrays and slices ----
97169
rule #elemSize(typeInfoArrayType(ELEM_TY, someTyConst(tyConst(LEN, _))))
98170
=> #elemSize(lookupTy(ELEM_TY)) *Int readTyConstInt(LEN)
171+
// Slice `[T]` has dynamic size; plain value is unsized
172+
rule #elemSize(typeInfoArrayType(_ELEM_TY, noTyConst)) => 0
99173
100-
// thin and fat pointers
174+
// ---- thin and fat pointers ----
101175
rule #elemSize(typeInfoRefType(TY)) => #elemSize(typeInfoPrimitiveType(primTypeUint(uintTyUsize)))
102176
requires dynamicSize(1) ==K #metadataSize(TY)
103177
rule #elemSize(typeInfoRefType(_)) => 2 *Int #elemSize(typeInfoPrimitiveType(primTypeUint(uintTyUsize)))
@@ -107,10 +181,24 @@ Known element sizes for common types:
107181
rule #elemSize(typeInfoPtrType(_)) => 2 *Int #elemSize(typeInfoPrimitiveType(primTypeUint(uintTyUsize)))
108182
[owise]
109183
184+
// ---- Tuples ----
185+
// Without layout, approximate as sum of element sizes (ignores padding).
186+
rule #elemSize(typeInfoTupleType(.Tys)) => 0
187+
rule #elemSize(typeInfoTupleType(TY TYS))
188+
=> #elemSize(lookupTy(TY)) +Int #elemSize(typeInfoTupleType(TYS))
189+
190+
// ---- Structs and Enums with layout ----
191+
rule #elemSize(typeInfoStructType(_, _, _, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE)
192+
rule #elemSize(typeInfoEnumType(_, _, _, _, someLayoutShape(layoutShape(_, _, _, _, SIZE)))) => #msBytes(SIZE)
193+
194+
// ---- Function item types ----
195+
// Function items are zero-sized; function pointers are handled by PtrType
196+
rule #elemSize(typeInfoFunType(_)) => 0
197+
110198
rule #elemSize(typeInfoVoidType) => 0
111199
112-
// FIXME can only use size from layout here. Requires adding layout first.
113-
// Enum, Struct, Tuple,
200+
// Fallback to keep the function total for any remaining cases
201+
rule #elemSize(_) => 0 [owise]
114202
115203
rule 0 <=Int #elemSize(_) => true [simplification, preserves-definedness]
116204
```

kmir/src/kmir/kdist/mir-semantics/ty.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,11 @@ syntax ExistentialPredicateBinders ::= List {ExistentialPredicateBinder, ""}
265265
, discriminants: Discriminants
266266
, fields: Tyss
267267
, layout: MaybeLayoutShape) [symbol(TypeInfo::EnumType) , group(mir-enum---name--adt-def--discriminants--fields--layout)]
268-
| typeInfoStructType(MIRString, AdtDef, Tys) [symbol(TypeInfo::StructType) , group(mir-enum---name--adt-def--fields)]
268+
| typeInfoStructType(
269+
name: MIRString
270+
, adtDef: AdtDef
271+
, fields: Tys
272+
, layout: MaybeLayoutShape) [symbol(TypeInfo::StructType) , group(mir-enum---name--adt-def--fields--layout)]
269273
| typeInfoUnionType(MIRString, AdtDef) [symbol(TypeInfo::UnionType) , group(mir-enum---name--adt-def)]
270274
| typeInfoArrayType(Ty, MaybeTyConst) [symbol(TypeInfo::ArrayType) , group(mir-enum---elem-type--size)]
271275
| typeInfoPtrType(Ty) [symbol(TypeInfo::PtrType) , group(mir-enum---pointee-type)]
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#![allow(dead_code)]
2+
3+
#[derive(Copy, Clone)]
4+
pub struct Pubkey(pub [u8; 32]);
5+
6+
const KEY: Pubkey = Pubkey([
7+
0, 1, 2, 3, 4, 5, 6, 7,
8+
8, 9, 10, 11, 12, 13, 14, 15,
9+
16, 17, 18, 19, 20, 21, 22, 23,
10+
24, 25, 26, 27, 28, 29, 30, 31,
11+
]);
12+
13+
#[inline(never)]
14+
fn use_inner(arr: &[u8; 32]) -> usize {
15+
// Touch the array so MIR keeps the projection
16+
if arr[0] == 0 { 32 } else { 0 }
17+
}
18+
19+
fn main() {
20+
let pk = KEY; // forces constant decoding of StructType(newtype)
21+
let len = use_inner(&pk.0); // field projection .0; needs StructType decode to Aggregate
22+
core::hint::black_box(len);
23+
}
24+

0 commit comments

Comments
 (0)