From 20e051ab67c1c12b8ebabf6b531f110638a46cf2 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 5 Sep 2025 22:58:48 -0700 Subject: [PATCH 01/13] Propate to exact types in struct-utils.h Key collected struct information on both heap type and exactness, allowing queries for exact types to return more precise results than queries on the corresponding inexact types. Use this to fix a bug in CFP where it failed to take into account exactness and would unnecessarily and incorrectly emit a select between two values of different types where a single exact type was expected. Also update GTO to propagate to exact types even though it does not take advantage of them. This is necessary because the FieldScanner now collects information for exact and inexact types separately and they need to be combined. --- src/ir/struct-utils.h | 115 +++++++++++++++--------- src/passes/ConstantFieldPropagation.cpp | 19 ++-- src/passes/GlobalTypeOptimization.cpp | 10 +-- test/lit/passes/cfp-reftest-desc.wast | 79 ++++++++++++++++ test/lit/passes/cfp-reftest.wast | 54 +++++++++++ 5 files changed, 223 insertions(+), 54 deletions(-) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index c2c92f32a42..35c4e7ef613 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -19,6 +19,7 @@ #include "ir/properties.h" #include "ir/subtypes.h" +#include "wasm-type.h" #include "wasm.h" namespace wasm { @@ -88,19 +89,24 @@ template struct StructValues : public std::vector { // Also provides a combineInto() helper that combines one map into another. This // depends on the underlying T defining a combine() method. template -struct StructValuesMap : public std::unordered_map> { +struct StructValuesMap + : public std::unordered_map, StructValues> { // When we access an item, if it does not already exist, create it with a // vector of the right length for that type. - StructValues& operator[](HeapType type) { - assert(type.isStruct()); + StructValues& operator[](std::pair type) { + assert(type.first.isStruct()); auto inserted = this->insert({type, {}}); auto& values = inserted.first->second; if (inserted.second) { - values.resize(type.getStruct().fields.size()); + values.resize(type.first.getStruct().fields.size()); } return values; } + StructValues& operator[](HeapType type) { + return (*this)[{type, Inexact}]; + } + void combineInto(StructValuesMap& combinedInfos) const { for (auto& [type, info] : *this) { for (Index i = 0; i < info.size(); i++) { @@ -113,7 +119,8 @@ struct StructValuesMap : public std::unordered_map> { void dump(std::ostream& o) { o << "dump " << this << '\n'; for (auto& [type, vec] : (*this)) { - o << "dump " << type << " " << &vec << ' '; + o << "dump " << type.first << (type.second == Exact ? " exact " : " ") + << &vec << ' '; for (auto x : vec) { x.dump(o); o << " "; @@ -203,7 +210,8 @@ struct StructScanner // Note writes to all the fields of the struct. auto heapType = type.getHeapType(); auto& fields = heapType.getStruct().fields; - auto& infos = functionNewInfos[this->getFunction()][heapType]; + auto ht = std::make_pair(heapType, Exact); + auto& infos = functionNewInfos[this->getFunction()][ht]; for (Index i = 0; i < fields.size(); i++) { if (curr->isWithDefault()) { self().noteDefault(fields[i].type, heapType, i, infos[i]); @@ -224,11 +232,12 @@ struct StructScanner } // Note a write to this field of the struct. - noteExpressionOrCopy(curr->value, - type.getHeapType(), - curr->index, - functionSetGetInfos[this->getFunction()] - [type.getHeapType()][curr->index]); + auto ht = std::make_pair(type.getHeapType(), type.getExactness()); + noteExpressionOrCopy( + curr->value, + type.getHeapType(), + curr->index, + functionSetGetInfos[this->getFunction()][ht][curr->index]); } void visitStructGet(StructGet* curr) { @@ -237,11 +246,11 @@ struct StructScanner return; } - auto heapType = type.getHeapType(); + auto ht = std::make_pair(type.getHeapType(), type.getExactness()); auto index = curr->index; - self().noteRead(heapType, + self().noteRead(type.getHeapType(), index, - functionSetGetInfos[this->getFunction()][heapType][index]); + functionSetGetInfos[this->getFunction()][ht][index]); } void visitStructRMW(StructRMW* curr) { @@ -251,9 +260,9 @@ struct StructScanner } auto heapType = type.getHeapType(); + auto ht = std::make_pair(heapType, type.getExactness()); auto index = curr->index; - auto& info = - functionSetGetInfos[this->getFunction()][type.getHeapType()][index]; + auto& info = functionSetGetInfos[this->getFunction()][ht][index]; if (curr->op == RMWXchg) { // An xchg is really like a read and write combined. @@ -274,9 +283,9 @@ struct StructScanner } auto heapType = type.getHeapType(); + auto ht = std::make_pair(heapType, type.getExactness()); auto index = curr->index; - auto& info = - functionSetGetInfos[this->getFunction()][type.getHeapType()][curr->index]; + auto& info = functionSetGetInfos[this->getFunction()][ht][index]; // A cmpxchg is like a read and conditional write. self().noteRead(heapType, index, info); @@ -310,11 +319,12 @@ struct StructScanner return; } auto heapType = type.getHeapType(); + auto ht = std::make_pair(heapType, type.getExactness()); if (heapType.isStruct()) { // Any subtype of the reference here may be read from. self().noteRead(heapType, DescriptorIndex, - functionSetGetInfos[this->getFunction()][heapType].desc); + functionSetGetInfos[this->getFunction()][ht].desc); return; } } @@ -372,13 +382,19 @@ template class TypeHierarchyPropagator { // Propagate given a StructValuesMap, which means we need to take into // account fields. void propagateToSuperTypes(StructValuesMap& infos) { - propagate(infos, false, true); + propagate(infos, false, true, true); } void propagateToSubTypes(StructValuesMap& infos) { - propagate(infos, true, false); + propagate(infos, true, false, false); + } + void propagateToSubTypesWithExact(StructValuesMap& infos) { + propagate(infos, true, false, true); } void propagateToSuperAndSubTypes(StructValuesMap& infos) { - propagate(infos, true, true); + propagate(infos, true, true, false); + } + void propagateToSuperAndSubTypesWithExact(StructValuesMap& infos) { + propagate(infos, true, true, true); } // Propagate on a simpler map of structs and infos (that is, not using @@ -398,46 +414,63 @@ template class TypeHierarchyPropagator { private: void propagate(StructValuesMap& combinedInfos, bool toSubTypes, - bool toSuperTypes) { - UniqueDeferredQueue work; - for (auto& [type, _] : combinedInfos) { - work.push(type); + bool toSuperTypes, + bool includeExact) { + UniqueDeferredQueue> work; + for (auto& [ht, _] : combinedInfos) { + work.push(ht); } while (!work.empty()) { - auto type = work.pop(); - auto& infos = combinedInfos[type]; + auto [type, exactness] = work.pop(); + auto& infos = combinedInfos[{type, exactness}]; if (toSuperTypes) { - // Propagate shared fields to the supertype. - if (auto superType = type.getDeclaredSuperType()) { - auto& superInfos = combinedInfos[*superType]; - auto& superFields = superType->getStruct().fields; - for (Index i = 0; i < superFields.size(); i++) { + // Propagate shared fields to the supertype, which may be the inexact + // version of the same type. + std::optional> super; + if (exactness == Exact) { + super = {type, Inexact}; + } else if (auto superType = type.getDeclaredSuperType()) { + super = {*superType, Inexact}; + } + if (super) { + auto& superInfos = combinedInfos[*super]; + const auto& superFields = &super->first.getStruct().fields; + for (Index i = 0; i < superFields->size(); i++) { if (superInfos[i].combine(infos[i])) { - work.push(*superType); + work.push(*super); } } // Propagate the descriptor to the super, if the super has one. - if (superType->getDescriptorType() && + if (super->first.getDescriptorType() && superInfos.desc.combine(infos.desc)) { - work.push(*superType); + work.push(*super); } } } if (toSubTypes) { - // Propagate shared fields to the subtypes. + // Propagate shared fields to the subtypes, which may just be the exact + // version of the same type. auto numFields = type.getStruct().fields.size(); - for (auto subType : subTypes.getImmediateSubTypes(type)) { - auto& subInfos = combinedInfos[subType]; + std::vector> subs; + if (includeExact && exactness == Inexact) { + subs = {{type, Exact}}; + } else { + for (auto subType : subTypes.getImmediateSubTypes(type)) { + subs.emplace_back(subType, Inexact); + } + } + for (auto sub : subs) { + auto& subInfos = combinedInfos[sub]; for (Index i = 0; i < numFields; i++) { if (subInfos[i].combine(infos[i])) { - work.push(subType); + work.push(sub); } } // Propagate the descriptor. if (subInfos.desc.combine(infos.desc)) { - work.push(subType); + work.push(sub); } } } diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index 9838bf636ff..8d0244f9450 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -53,7 +53,6 @@ #include "ir/bits.h" #include "ir/gc-type-utils.h" -#include "ir/module-utils.h" #include "ir/possible-constant.h" #include "ir/struct-utils.h" #include "ir/utils.h" @@ -114,8 +113,10 @@ struct FunctionOptimizer : public WalkerPass> { return heapType; } - PossibleConstantValues getInfo(HeapType type, Index index) { - if (auto it = propagatedInfos.find(type); it != propagatedInfos.end()) { + PossibleConstantValues + getInfo(HeapType type, Exactness exactness, Index index) { + if (auto it = propagatedInfos.find({type, exactness}); + it != propagatedInfos.end()) { // There is information on this type, fetch it. return it->second[index]; } @@ -177,7 +178,8 @@ struct FunctionOptimizer : public WalkerPass> { // Find the info for this field, and see if we can optimize. First, see if // there is any information for this heap type at all. If there isn't, it is // as if nothing was ever noted for that field. - PossibleConstantValues info = getInfo(heapType, index); + PossibleConstantValues info = + getInfo(heapType, ref->type.getExactness(), index); if (!info.hasNoted()) { // This field is never written at all. That means that we do not even // construct any data of this type, and so it is a logic error to reach @@ -282,7 +284,7 @@ struct FunctionOptimizer : public WalkerPass> { return; } - auto iter = rawNewInfos.find(type); + auto iter = rawNewInfos.find({type, Exact}); if (iter == rawNewInfos.end()) { // This type has no struct.news, so we can ignore it: it is abstract. return; @@ -446,7 +448,8 @@ struct PCVScanner void noteCopy(HeapType type, Index index, PossibleConstantValues& info) { // Note copies, as they must be considered later. See the comment on the // propagation of values below. - functionCopyInfos[getFunction()][type][index] = true; + // TODO: Take into account exactness here. + functionCopyInfos[getFunction()][{type, Inexact}][index] = true; } void noteRead(HeapType type, Index index, PossibleConstantValues& info) { @@ -558,7 +561,7 @@ struct ConstantFieldPropagation : public Pass { // a copy of A means it could be a copy of B or C). StructUtils::TypeHierarchyPropagator boolPropagator(subTypes); - boolPropagator.propagateToSubTypes(combinedCopyInfos); + boolPropagator.propagateToSubTypesWithExact(combinedCopyInfos); for (auto& [type, copied] : combinedCopyInfos) { for (Index i = 0; i < copied.size(); i++) { if (copied[i]) { @@ -570,7 +573,7 @@ struct ConstantFieldPropagation : public Pass { StructUtils::TypeHierarchyPropagator propagator( subTypes); propagator.propagateToSuperTypes(combinedNewInfos); - propagator.propagateToSuperAndSubTypes(combinedSetInfos); + propagator.propagateToSuperAndSubTypesWithExact(combinedSetInfos); // Combine both sources of information to the final information that gets // care about. diff --git a/src/passes/GlobalTypeOptimization.cpp b/src/passes/GlobalTypeOptimization.cpp index 3c6e411799c..d3d0a3a8883 100644 --- a/src/passes/GlobalTypeOptimization.cpp +++ b/src/passes/GlobalTypeOptimization.cpp @@ -30,7 +30,6 @@ #include "ir/struct-utils.h" #include "ir/subtypes.h" #include "ir/type-updating.h" -#include "ir/utils.h" #include "pass.h" #include "support/permutations.h" #include "wasm-builder.h" @@ -205,9 +204,9 @@ struct GlobalTypeOptimization : public Pass { SubTypes subTypes(*module); StructUtils::TypeHierarchyPropagator propagator(subTypes); auto dataFromSubsAndSupersMap = combinedSetGetInfos; - propagator.propagateToSuperAndSubTypes(dataFromSubsAndSupersMap); + propagator.propagateToSuperAndSubTypesWithExact(dataFromSubsAndSupersMap); auto dataFromSupersMap = std::move(combinedSetGetInfos); - propagator.propagateToSubTypes(dataFromSupersMap); + propagator.propagateToSubTypesWithExact(dataFromSupersMap); // Find the public types, which we must not modify. auto publicTypes = ModuleUtils::getPublicHeapTypes(*module); @@ -224,8 +223,9 @@ struct GlobalTypeOptimization : public Pass { continue; } auto& fields = type.getStruct().fields; - auto& dataFromSubsAndSupers = dataFromSubsAndSupersMap[type]; - auto& dataFromSupers = dataFromSupersMap[type]; + auto ht = std::make_pair(type, Exact); + auto& dataFromSubsAndSupers = dataFromSubsAndSupersMap[ht]; + auto& dataFromSupers = dataFromSupersMap[ht]; // Process immutability. for (Index i = 0; i < fields.size(); i++) { diff --git a/test/lit/passes/cfp-reftest-desc.wast b/test/lit/passes/cfp-reftest-desc.wast index 11c649260a4..7064276f2b4 100644 --- a/test/lit/passes/cfp-reftest-desc.wast +++ b/test/lit/passes/cfp-reftest-desc.wast @@ -106,3 +106,82 @@ ) ) +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $super (sub (descriptor $super.desc (struct)))) + (type $super (sub (descriptor $super.desc (struct)))) + ;; CHECK: (type $super.desc (sub (describes $super (struct)))) + (type $super.desc (sub (describes $super (struct)))) + + ;; CHECK: (type $func (func (param i32) (result i32))) + (type $func (func (param i32) (result i32))) + + ;; CHECK: (type $sub (sub $super (descriptor $sub.desc (struct)))) + (type $sub (sub $super (descriptor $sub.desc (struct)))) + ;; CHECK: (type $sub.desc (sub $super.desc (describes $sub (struct)))) + (type $sub.desc (sub $super.desc (describes $sub (struct)))) + ) + + ;; CHECK: (type $5 (func (result (ref (exact $super.desc))))) + + ;; CHECK: (global $A (ref (exact $super.desc)) (struct.new_default $super.desc)) + (global $A (ref (exact $super.desc)) (struct.new $super.desc)) + + ;; CHECK: (global $B (ref (exact $sub.desc)) (struct.new_default $sub.desc)) + (global $B (ref (exact $sub.desc)) (struct.new $sub.desc)) + + ;; CHECK: (func $test (type $5) (result (ref (exact $super.desc))) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_default $super + ;; CHECK-NEXT: (global.get $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new_default $sub + ;; CHECK-NEXT: (global.get $B) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (block (result (ref (exact $super.desc))) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (block (result nullref) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (global.get $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (result (ref (exact $super.desc))) + (drop + (struct.new_default $super + (global.get $A) + ) + ) + (drop + (struct.new_default $sub + (global.get $B) + ) + ) + ;; We read from an exact $super here, so the type of the ref.get_desc is + ;; exact as well. If we ignore that in the optimization, we might think that + ;; the two struct.news before us are two possible values, one from $super and + ;; one from $sub, and if we emitted a ref.test between those values, we'd get + ;; a non-exact value that does not validate. + ;; + ;; Instead, we should look only at $super itself, and optimize to $A. + (ref.get_desc $super + (block (result (ref null (exact $super))) + (ref.null $super) + ) + ) + ) + + ;; CHECK: (func $func (type $func) (param $0 i32) (result i32) + ;; CHECK-NEXT: (i32.const 42) + ;; CHECK-NEXT: ) + (func $func (type $func) (param $0 i32) (result i32) + (i32.const 42) + ) +) diff --git a/test/lit/passes/cfp-reftest.wast b/test/lit/passes/cfp-reftest.wast index 3f903e8b451..f46b211cd5e 100644 --- a/test/lit/passes/cfp-reftest.wast +++ b/test/lit/passes/cfp-reftest.wast @@ -1456,3 +1456,57 @@ ) ) ) + +(module + ;; CHECK: (type $struct (sub (struct (field i32)))) + (type $struct (sub (struct i32))) + ;; CHECK: (type $1 (func)) + + ;; CHECK: (type $substruct (sub $struct (struct (field i32) (field f64)))) + (type $substruct (sub $struct (struct i32 f64))) + + ;; CHECK: (type $3 (func (param (ref null (exact $struct))) (result i32))) + + ;; CHECK: (func $create (type $1) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $struct + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $substruct + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $create + ;; Used below. + (drop + (struct.new $struct + (i32.const 10) + ) + ) + (drop + (struct.new $substruct + (i32.const 20) + (f64.const 3.14159) + ) + ) + ) + ;; CHECK: (func $get (type $3) (param $struct (ref null (exact $struct))) (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + (func $get (param $struct (ref null (exact $struct))) (result i32) + ;; The type here is exact, so we do not even need to do a select: only the + ;; super's value is possible, 10. + (struct.get $struct 0 + (local.get $struct) + ) + ) +) From be904ae09b912a31805495b44801bc2418a3fe87 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 8 Sep 2025 14:28:35 -0700 Subject: [PATCH 02/13] [NFC] Add tests for missing exact CFP optimizations CFP takes advantage of exact type information, but it currently does so only for immutable fields. It is also unnecessarily conservative about how it propagates type information so that sets to a type inhibit optimizations of its sibling types, even though those sets cannot possibly affect the siblings. Add tests for these cases to demonstrate the benefit of follow-on PRs that will fix these issues. --- test/lit/passes/cfp.wast | 365 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 365 insertions(+) diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast index 324178d331d..16e80c5c8f6 100644 --- a/test/lit/passes/cfp.wast +++ b/test/lit/passes/cfp.wast @@ -2499,6 +2499,371 @@ ) ) +(module + ;; Same as above but now the fields are mutable. + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (struct (field (mut i32))))) + (type $A (sub (struct (field (mut i32))))) + ;; CHECK: (type $B (sub $A (struct (field (mut i32))))) + (type $B (sub $A (struct (field (mut i32))))) + ) + + ;; CHECK: (type $2 (func (param i32))) + + ;; CHECK: (func $test (type $2) (param $0 i32) + ;; CHECK-NEXT: (local $A (ref $A)) + ;; CHECK-NEXT: (local $B (ref $B)) + ;; CHECK-NEXT: (local $A-exact (ref (exact $A))) + ;; CHECK-NEXT: (local $B-exact (ref (exact $B))) + ;; CHECK-NEXT: (local.set $A + ;; CHECK-NEXT: (local.tee $A-exact + ;; CHECK-NEXT: (struct.new $A + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $B + ;; CHECK-NEXT: (local.tee $B-exact + ;; CHECK-NEXT: (struct.new $B + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $A 0 + ;; CHECK-NEXT: (local.get $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $B) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $A 0 + ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $B-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $0 i32) + (local $A (ref $A)) + (local $B (ref $B)) + (local $A-exact (ref (exact $A))) + (local $B-exact (ref (exact $B))) + (local.set $A + (local.tee $A-exact + (struct.new $A + (i32.const 10) + ) + ) + ) + (local.set $B + (local.tee $B-exact + (struct.new $B + (i32.const 20) + ) + ) + ) + ;; We can optimize an inexact $B, but not $A. + (drop + (struct.get $A 0 + (local.get $A) + ) + ) + (drop + (struct.get $B 0 + (local.get $B) + ) + ) + ;; We should be able to optimize both exact references TODO. + (drop + (struct.get $A 0 + (local.get $A-exact) + ) + ) + (drop + (struct.get $B 0 + (local.get $B-exact) + ) + ) + ) +) + +(module + ;; Same as above but now we add no-op sets. + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (struct (field (mut i32))))) + (type $A (sub (struct (field (mut i32))))) + ;; CHECK: (type $B (sub $A (struct (field (mut i32))))) + (type $B (sub $A (struct (field (mut i32))))) + ) + + ;; CHECK: (type $2 (func (param i32))) + + ;; CHECK: (func $test (type $2) (param $0 i32) + ;; CHECK-NEXT: (local $A (ref $A)) + ;; CHECK-NEXT: (local $B (ref $B)) + ;; CHECK-NEXT: (local $A-exact (ref (exact $A))) + ;; CHECK-NEXT: (local $B-exact (ref (exact $B))) + ;; CHECK-NEXT: (local.set $A + ;; CHECK-NEXT: (local.tee $A-exact + ;; CHECK-NEXT: (struct.new $A + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $B + ;; CHECK-NEXT: (local.tee $B-exact + ;; CHECK-NEXT: (struct.new $B + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $A 0 + ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $B 0 + ;; CHECK-NEXT: (local.get $B-exact) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $A 0 + ;; CHECK-NEXT: (local.get $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $B 0 + ;; CHECK-NEXT: (local.get $B) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $A 0 + ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $B 0 + ;; CHECK-NEXT: (local.get $B-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $0 i32) + (local $A (ref $A)) + (local $B (ref $B)) + (local $A-exact (ref (exact $A))) + (local $B-exact (ref (exact $B))) + (local.set $A + (local.tee $A-exact + (struct.new $A + (i32.const 10) + ) + ) + ) + (local.set $B + (local.tee $B-exact + (struct.new $B + (i32.const 20) + ) + ) + ) + ;; No-op exact sets should not inhibit optimization. + (struct.set $A 0 + (local.get $A-exact) + (i32.const 10) + ) + (struct.set $B 0 + (local.get $B-exact) + (i32.const 20) + ) + ;; We should be able to optimize an inexact $B, but not $A TODO. + (drop + (struct.get $A 0 + (local.get $A) + ) + ) + (drop + (struct.get $B 0 + (local.get $B) + ) + ) + ;; We should be able to optimize both exact references TODO. + (drop + (struct.get $A 0 + (local.get $A-exact) + ) + ) + (drop + (struct.get $B 0 + (local.get $B-exact) + ) + ) + ) +) + +(module + ;; Sets to a subtype should not affect exact gets of a supertype or sibling. + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (struct (field (mut i32))))) + (type $A (sub (struct (field (mut i32))))) + ;; CHECK: (type $B (sub $A (struct (field (mut i32))))) + (type $B (sub $A (struct (field (mut i32))))) + ;; CHECK: (type $C (sub $A (struct (field (mut i32))))) + (type $C (sub $A (struct (field (mut i32))))) + ) + + ;; CHECK: (type $3 (func)) + + ;; CHECK: (type $4 (func (param (ref $B)))) + + ;; CHECK: (type $5 (func (param (ref $A) (ref $B) (ref $C)))) + + ;; CHECK: (type $6 (func (param (ref (exact $A)) (ref (exact $B)) (ref (exact $C))))) + + ;; CHECK: (func $news (type $3) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $A + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $B + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $C + ;; CHECK-NEXT: (i32.const 30) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $news + (drop + (struct.new $A + (i32.const 10) + ) + ) + (drop + (struct.new $B + (i32.const 20) + ) + ) + (drop + (struct.new $C + (i32.const 30) + ) + ) + ) + + ;; CHECK: (func $set-B (type $4) (param $B (ref $B)) + ;; CHECK-NEXT: (struct.set $B 0 + ;; CHECK-NEXT: (local.get $B) + ;; CHECK-NEXT: (i32.const 666) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $set-B (param $B (ref $B)) + ;; Inhibits optimizations on B and inexact A only. + (struct.set $B 0 + (local.get $B) + (i32.const 666) + ) + ) + + ;; CHECK: (func $inexact-gets (type $5) (param $A (ref $A)) (param $B (ref $B)) (param $C (ref $C)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $A 0 + ;; CHECK-NEXT: (local.get $A) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $B 0 + ;; CHECK-NEXT: (local.get $B) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $C 0 + ;; CHECK-NEXT: (local.get $C) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $inexact-gets (param $A (ref $A)) (param $B (ref $B)) (param $C (ref $C)) + (drop + (struct.get $A 0 + (local.get $A) + ) + ) + (drop + (struct.get $B 0 + (local.get $B) + ) + ) + ;; This should be optimizable TODO. + (drop + (struct.get $C 0 + (local.get $C) + ) + ) + ) + + ;; CHECK: (func $exact-gets (type $6) (param $A-exact (ref (exact $A))) (param $B-exact (ref (exact $B))) (param $C-exact (ref (exact $C))) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $A 0 + ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $B 0 + ;; CHECK-NEXT: (local.get $B-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.get $C 0 + ;; CHECK-NEXT: (local.get $C-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $exact-gets (param $A-exact (ref (exact $A))) + (param $B-exact (ref (exact $B))) + (param $C-exact (ref (exact $C))) + (drop + ;; This should be optimizable TODO. + (struct.get $A 0 + (local.get $A-exact) + ) + ) + ;; Not optimizable. + (drop + (struct.get $B 0 + (local.get $B-exact) + ) + ) + ;; This should be optimizable TODO. + (drop + (struct.get $C 0 + (local.get $C-exact) + ) + ) + ) +) + ;; A type with two subtypes. A copy on the parent can affect either child. (module (rec From 65eff0c1cefc0e63b2cc1d6ec5d99b8f23cc27d0 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 8 Sep 2025 14:47:42 -0700 Subject: [PATCH 03/13] udpate test --- test/lit/passes/cfp.wast | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast index 16e80c5c8f6..4ccdda92a2b 100644 --- a/test/lit/passes/cfp.wast +++ b/test/lit/passes/cfp.wast @@ -2546,8 +2546,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $A 0 - ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -2591,7 +2596,7 @@ (local.get $B) ) ) - ;; We should be able to optimize both exact references TODO. + ;; We should be able to optimize both exact references. (drop (struct.get $A 0 (local.get $A-exact) From a310e7fd8035de25e2b82799335b4914f2df4da4 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 8 Sep 2025 21:30:31 -0700 Subject: [PATCH 04/13] fix --- src/ir/struct-utils.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index 35c4e7ef613..7f3e7f91a6f 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -449,17 +449,16 @@ template class TypeHierarchyPropagator { } } - if (toSubTypes) { + if (toSubTypes && exactness == Inexact) { // Propagate shared fields to the subtypes, which may just be the exact // version of the same type. auto numFields = type.getStruct().fields.size(); std::vector> subs; - if (includeExact && exactness == Inexact) { - subs = {{type, Exact}}; - } else { - for (auto subType : subTypes.getImmediateSubTypes(type)) { - subs.emplace_back(subType, Inexact); - } + if (includeExact) { + subs.emplace_back(type, Exact); + } + for (auto subType : subTypes.getImmediateSubTypes(type)) { + subs.emplace_back(subType, Inexact); } for (auto sub : subs) { auto& subInfos = combinedInfos[sub]; From c567e374e82c616273126701ab55f5659260a78b Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 8 Sep 2025 21:34:20 -0700 Subject: [PATCH 05/13] comment --- src/ir/struct-utils.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index 7f3e7f91a6f..77ed41fdbea 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -84,7 +84,9 @@ template struct StructValues : public std::vector { T desc; }; -// Maps heap types to a StructValues for that heap type. +// Maps heap types to a StructValues for that heap type. Includes exactness in +// the key to allow differentiating between values for exact and inexact +// references to each type. // // Also provides a combineInto() helper that combines one map into another. This // depends on the underlying T defining a combine() method. From fd3ec9caf7d5239d453db836a0634aece01ae7f0 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Mon, 8 Sep 2025 21:23:49 -0700 Subject: [PATCH 06/13] Improve CFP by removing rawNewInfos For the purposes of CFP, there's nothing fundamentally different between a set on an exact reference and a value set by allocation. CFP's use of the allocation values without considering all exact sets was therefore an unnecessary complication that restricted CFP's optimizing power. Expand optimizeUsingRefTest to optimize mutable fields, including those that have been set, by using the full available information instead of just the allocation values. Handle copies more judiciously by propagating once to find copied values and then propagate again while taking those copied values into account. This scheme can be extended in the future to precisely handle copies between different fields and types as well. Also optimize siblings better by propagating first down and then up rather than propagating in both directions at once. This avoid unnecessarily propagating set values to siblings. --- src/passes/ConstantFieldPropagation.cpp | 123 ++++++++--------- test/lit/passes/cfp-reftest.wast | 169 +++++++++++++++++++++++- test/lit/passes/cfp.wast | 142 +++++++++++++------- 3 files changed, 321 insertions(+), 113 deletions(-) diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index e1c120685bf..9043a31475a 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -91,15 +91,15 @@ struct FunctionOptimizer : public WalkerPass> { // subtyping and new infos (information about struct.news). std::unique_ptr create() override { return std::make_unique( - propagatedInfos, subTypes, rawNewInfos, refTest); + propagatedInfos, refTestInfos, subTypes, refTest); } FunctionOptimizer(const PCVStructValuesMap& propagatedInfos, + const PCVStructValuesMap& refTestInfos, const SubTypes& subTypes, - const PCVStructValuesMap& rawNewInfos, bool refTest) - : propagatedInfos(propagatedInfos), subTypes(subTypes), - rawNewInfos(rawNewInfos), refTest(refTest) {} + : propagatedInfos(propagatedInfos), refTestInfos(refTestInfos), + subTypes(subTypes), refTest(refTest) {} template std::optional getRelevantHeapType(T* ref) { auto type = ref->type; @@ -210,7 +210,7 @@ struct FunctionOptimizer : public WalkerPass> { // on simply applying a constant. However, we can try to use a ref.test, if // that is allowed. if (!info.isConstant()) { - if (refTest) { + if (refTest && !ref->type.isExact()) { optimizeUsingRefTest(curr, ref, index); } return; @@ -233,22 +233,6 @@ struct FunctionOptimizer : public WalkerPass> { auto refType = ref->type; auto refHeapType = refType.getHeapType(); - // We only handle immutable fields in this function, as we will be looking - // at |rawNewInfos|. That is, we are trying to see when a type and its - // subtypes have different values (so that we can differentiate between them - // using a ref.test), and those differences are lost in |propagatedInfos|, - // which has propagated to relevant types so that we can do a single check - // to see what value could be there. So we need to use something more - // precise, |rawNewInfos|, which tracks the values written to struct.news, - // where we know the type exactly (unlike with a struct.set). But for that - // reason the field must be immutable, so that it is valid to only look at - // the struct.news. (A more complex flow analysis could do better here, but - // would be far beyond the scope of this pass.) - if (index != StructUtils::DescriptorIndex && - GCTypeUtils::getField(refType, index)->mutable_ == Mutable) { - return; - } - // We seek two possible constant values. For each we track the constant and // the types that have that constant. For example, if we have types A, B, C // and A and B have 42 in their field, and C has 1337, then we'd have this: @@ -283,13 +267,17 @@ struct FunctionOptimizer : public WalkerPass> { return; } - auto iter = rawNewInfos.find({type, Exact}); - if (iter == rawNewInfos.end()) { - // This type has no struct.news, so we can ignore it: it is abstract. + auto iter = refTestInfos.find({type, Exact}); + if (iter == refTestInfos.end()) { + // This type has no allocations, so we can ignore it: it is abstract. return; } auto value = iter->second[index]; + if (!value.hasNoted()) { + // Also abstract and ignorable. + return; + } if (!value.isConstant()) { // The value here is not constant, so give up entirely. fail = true; @@ -409,8 +397,8 @@ struct FunctionOptimizer : public WalkerPass> { private: const PCVStructValuesMap& propagatedInfos; + const PCVStructValuesMap& refTestInfos; const SubTypes& subTypes; - const PCVStructValuesMap& rawNewInfos; const bool refTest; bool changed = false; @@ -492,20 +480,13 @@ struct ConstantFieldPropagation : public Pass { scanner.runOnModuleCode(runner, module); // Combine the data from the functions. - PCVStructValuesMap combinedNewInfos, combinedSetInfos; - functionNewInfos.combineInto(combinedNewInfos); + PCVStructValuesMap combinedSetInfos; + functionNewInfos.combineInto(combinedSetInfos); functionSetInfos.combineInto(combinedSetInfos); BoolStructValuesMap combinedCopyInfos; functionCopyInfos.combineInto(combinedCopyInfos); - // Prepare data we will need later. - SubTypes subTypes(*module); - - // Copy the unpropagated data before we propagate. We use this in precise - // lookups. - auto rawNewInfos = combinedNewInfos; - - // Handle subtyping. |combinedInfo| so far contains data that represents + // Handle subtyping. |combinedSetInfos| so far contains data that represents // each struct.new and struct.set's operation on the struct type used in // that instruction. That is, if we do a struct.set to type T, the value was // noted for type T. But our actual goal is to answer questions about @@ -532,10 +513,11 @@ struct ConstantFieldPropagation : public Pass { // efficient, we therefore propagate information about the possible values // in each field to both subtypes and supertypes. // - // struct.new on the other hand knows exactly what type is being written to, - // and so given a get of $A and a new of $B, the new is relevant for the get - // iff $A is a subtype of $B, so we only need to propagate in one direction - // there, to supertypes. + // Values written in struct.news are equivalent to values written to exact + // references. In both cases, the propagation to subtypes will not do + // anything because an exact reference has no non-trivial subtypes. This + // works out because a set of a field of an exact reference (or an + // allocation) cannot ever affect the value read out of a subtype's field. // // An exception to the above are copies. If a field is copied then even // struct.new information cannot be assumed to be precise: @@ -549,36 +531,57 @@ struct ConstantFieldPropagation : public Pass { // foo(A->f0); // These can contain 20, // foo(C->f0); // if the copy read from B. // - // To handle that, copied fields are treated like struct.set ones (by - // copying the struct.new data to struct.set). Note that we must propagate - // copying to subtypes first, as in the example above the struct.new values - // of subtypes must be taken into account (that is, A or a subtype is being - // copied, so we want to do the same thing for B and C as well as A, since - // a copy of A means it could be a copy of B or C). - StructUtils::TypeHierarchyPropagator - boolPropagator(subTypes); - boolPropagator.propagateToSubTypesWithExact(combinedCopyInfos); + // The handling of copies is explained below. + SubTypes subTypes(*module); + StructUtils::TypeHierarchyPropagator propagator( + subTypes); + + // Compute the values without accounting for copies. + PCVStructValuesMap noCopySetInfos = combinedSetInfos; + propagator.propagateToSubTypesWithExact(noCopySetInfos); + propagator.propagateToSuperTypes(noCopySetInfos); + + // Now account for copies. A copy takes a value from any subtype + // of the copy source to any subtype of the copy destination. Since we last + // propagated to supertypes, we know the propagated values increase + // monotonically as you go up the type hierarchy. The propagated value in a + // field therefore overapproximates the values in the corresponding field in + // all the subtypes. So for each copy, we can use the propagated value as + // the copied value. Then we will propagate set values again, this time + // including the copied values. We only need to repeat the propagation once; + // if the second propagation discovers greater values in the copied fields, + // it can only be because those greater values were propagated from a + // supertype. In that case, the greater value has also been propagated to + // all subtypes, so repeating the process will not further change anything. + // + // TODO: Track separate sources and destinations of copies rather than + // special-casing copies to self. This would let propagation discover + // greater copied values from unrelated types or even different field + // indices, so we would have to repeatedly propagate taking into account the + // latest discovered copied values until reaching a fixed point. for (auto& [type, copied] : combinedCopyInfos) { - for (Index i = 0; i < copied.size(); i++) { + for (Index i = 0; i < copied.size(); ++i) { if (copied[i]) { - combinedSetInfos[type][i].combine(combinedNewInfos[type][i]); + combinedSetInfos[type][i].combine(noCopySetInfos[type][i]); } } } - StructUtils::TypeHierarchyPropagator propagator( - subTypes); - propagator.propagateToSuperTypes(combinedNewInfos); - propagator.propagateToSuperAndSubTypesWithExact(combinedSetInfos); - - // Combine both sources of information to the final information that gets - // care about. - PCVStructValuesMap combinedInfos = std::move(combinedNewInfos); - combinedSetInfos.combineInto(combinedInfos); + // Propagate the values again, now including values readable by copies. + // RefTest optimization manually checks the values in every subtype to + // make sure they match, so there's no need to propagate values up for that. + // Snapshot the info before propagating up for use in RefTest + // optimization. + PCVStructValuesMap refTestInfos; + propagator.propagateToSubTypesWithExact(combinedSetInfos); + if (refTest) { + refTestInfos = combinedSetInfos; + } + propagator.propagateToSuperTypes(combinedSetInfos); // Optimize. // TODO: Skip this if we cannot optimize anything - FunctionOptimizer(combinedInfos, subTypes, rawNewInfos, refTest) + FunctionOptimizer(combinedSetInfos, refTestInfos, subTypes, refTest) .run(runner, module); } }; diff --git a/test/lit/passes/cfp-reftest.wast b/test/lit/passes/cfp-reftest.wast index f46b211cd5e..4c618c5eeea 100644 --- a/test/lit/passes/cfp-reftest.wast +++ b/test/lit/passes/cfp-reftest.wast @@ -279,18 +279,18 @@ ) ) -;; Almost optimizable, but the field is mutable, so we can't. +;; The field is mutable, but we can still optimize. (module ;; CHECK: (type $struct (sub (struct (field (mut i32))))) (type $struct (sub (struct (mut i32)))) - ;; CHECK: (type $1 (func)) - ;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64)))) (type $substruct (sub $struct (struct (mut i32) f64))) + ;; CHECK: (type $2 (func)) + ;; CHECK: (type $3 (func (param (ref null $struct)) (result i32))) - ;; CHECK: (func $create (type $1) + ;; CHECK: (func $create (type $2) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (struct.new $struct ;; CHECK-NEXT: (i32.const 10) @@ -317,6 +317,167 @@ ) ) ;; CHECK: (func $get (type $3) (param $struct (ref null $struct)) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (ref.test (ref $substruct) + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $get (param $struct (ref null $struct)) (result i32) + ;; We cannot optimize here. + (struct.get $struct 0 + (local.get $struct) + ) + ) +) + +;; No-op sets do not inhibit optimization. +(module + ;; CHECK: (type $struct (sub (struct (field (mut i32))))) + (type $struct (sub (struct (mut i32)))) + ;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64)))) + (type $substruct (sub $struct (struct (mut i32) f64))) + + ;; CHECK: (type $2 (func)) + + ;; CHECK: (type $3 (func (param (ref null (exact $struct)) (ref null $substruct)))) + + ;; CHECK: (type $4 (func (param (ref null $struct)) (result i32))) + + ;; CHECK: (func $create (type $2) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $struct + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $substruct + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $create + (drop + (struct.new $struct + (i32.const 10) + ) + ) + (drop + (struct.new $substruct + (i32.const 20) + (f64.const 3.14159) + ) + ) + ) + + ;; CHECK: (func $sets (type $3) (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct)) + ;; CHECK-NEXT: (struct.set $struct 0 + ;; CHECK-NEXT: (local.get $struct-exact) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $substruct 0 + ;; CHECK-NEXT: (local.get $substruct) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $sets (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct)) + (struct.set $struct 0 + (local.get $struct-exact) + (i32.const 10) + ) + (struct.set $substruct 0 + (local.get $substruct) + (i32.const 20) + ) + ) + + ;; CHECK: (func $get (type $4) (param $struct (ref null $struct)) (result i32) + ;; CHECK-NEXT: (select + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (ref.test (ref $substruct) + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $get (param $struct (ref null $struct)) (result i32) + ;; We cannot optimize here. + (struct.get $struct 0 + (local.get $struct) + ) + ) +) + +;; Same as above, except now the set to $struct is inexact so we cannot +;; optimize. +(module + ;; CHECK: (type $struct (sub (struct (field (mut i32))))) + (type $struct (sub (struct (mut i32)))) + ;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64)))) + (type $substruct (sub $struct (struct (mut i32) f64))) + + ;; CHECK: (type $2 (func)) + + ;; CHECK: (type $3 (func (param (ref null $struct) (ref null $substruct)))) + + ;; CHECK: (type $4 (func (param (ref null $struct)) (result i32))) + + ;; CHECK: (func $create (type $2) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $struct + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (struct.new $substruct + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: (f64.const 3.14159) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $create + (drop + (struct.new $struct + (i32.const 10) + ) + ) + (drop + (struct.new $substruct + (i32.const 20) + (f64.const 3.14159) + ) + ) + ) + + ;; CHECK: (func $sets (type $3) (param $struct (ref null $struct)) (param $substruct (ref null $substruct)) + ;; CHECK-NEXT: (struct.set $struct 0 + ;; CHECK-NEXT: (local.get $struct) + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.set $substruct 0 + ;; CHECK-NEXT: (local.get $substruct) + ;; CHECK-NEXT: (i32.const 20) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $sets (param $struct (ref null $struct)) (param $substruct (ref null $substruct)) + (struct.set $struct 0 + (local.get $struct) + (i32.const 10) + ) + (struct.set $substruct 0 + (local.get $substruct) + (i32.const 20) + ) + ) + + ;; CHECK: (func $get (type $4) (param $struct (ref null $struct)) (result i32) ;; CHECK-NEXT: (struct.get $struct 0 ;; CHECK-NEXT: (local.get $struct) ;; CHECK-NEXT: ) diff --git a/test/lit/passes/cfp.wast b/test/lit/passes/cfp.wast index 4ccdda92a2b..4478ced671f 100644 --- a/test/lit/passes/cfp.wast +++ b/test/lit/passes/cfp.wast @@ -2331,19 +2331,25 @@ ;; CHECK-NEXT: (local $B (ref $B)) ;; CHECK-NEXT: (struct.set $A 0 ;; CHECK-NEXT: (select (result (ref null $A)) - ;; CHECK-NEXT: (ref.null none) ;; CHECK-NEXT: (block (result (ref null $A)) - ;; CHECK-NEXT: (local.tee $B - ;; CHECK-NEXT: (struct.new $B - ;; CHECK-NEXT: (i32.const 20) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.tee $B + ;; CHECK-NEXT: (struct.new $B + ;; CHECK-NEXT: (i32.const 20) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (struct.get $A 0 - ;; CHECK-NEXT: (struct.new $A - ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: (select (result (ref null $A)) + ;; CHECK-NEXT: (block (result (ref null $A)) + ;; CHECK-NEXT: (ref.null none) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (struct.new $A + ;; CHECK-NEXT: (i32.const 10) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 0) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -2360,22 +2366,29 @@ ;; that we track the copied value even though the copy is on $A but it ;; affects $B. (struct.set $A 0 - ;; This select is used to keep the type that reaches the struct.set $A, - ;; and not $B, so it looks like a perfect copy of $A->$A. + ;; Use selects to make sure the types reaching the set and get are not more + ;; precise than (ref null $A). This will look like a perfect copy of + ;; $A->$A. (select (result (ref null $A)) - (ref.null none) (block (result (ref null $A)) - (local.tee $B - (struct.new $B - (i32.const 20) - ) + (ref.null none) + ) + (local.tee $B + (struct.new $B + (i32.const 20) ) ) (i32.const 0) ) (struct.get $A 0 - (struct.new $A - (i32.const 10) + (select (result (ref null $A)) + (block (result (ref null $A)) + (ref.null none) + ) + (struct.new $A + (i32.const 10) + ) + (i32.const 0) ) ) ) @@ -2655,18 +2668,33 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $B 0 - ;; CHECK-NEXT: (local.get $B) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $B) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 20) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $A 0 - ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $B 0 - ;; CHECK-NEXT: (local.get $B-exact) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $B-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 20) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -2698,7 +2726,7 @@ (local.get $B-exact) (i32.const 20) ) - ;; We should be able to optimize an inexact $B, but not $A TODO. + ;; We should be able to optimize an inexact $B, but not $A. (drop (struct.get $A 0 (local.get $A) @@ -2804,8 +2832,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $C 0 - ;; CHECK-NEXT: (local.get $C) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $C) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 30) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -2820,7 +2853,7 @@ (local.get $B) ) ) - ;; This should be optimizable TODO. + ;; This should be optimizable. (drop (struct.get $C 0 (local.get $C) @@ -2830,8 +2863,13 @@ ;; CHECK: (func $exact-gets (type $6) (param $A-exact (ref (exact $A))) (param $B-exact (ref (exact $B))) (param $C-exact (ref (exact $C))) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $A 0 - ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $A-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -2840,8 +2878,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $C 0 - ;; CHECK-NEXT: (local.get $C-exact) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $C-exact) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 30) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -2849,7 +2892,7 @@ (param $B-exact (ref (exact $B))) (param $C-exact (ref (exact $C))) (drop - ;; This should be optimizable TODO. + ;; This should be optimizable. (struct.get $A 0 (local.get $A-exact) ) @@ -2860,7 +2903,7 @@ (local.get $B-exact) ) ) - ;; This should be optimizable TODO. + ;; This should be optimizable. (drop (struct.get $C 0 (local.get $C-exact) @@ -3030,8 +3073,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $B2 0 - ;; CHECK-NEXT: (local.get $B2) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $B2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 20) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -3073,8 +3121,7 @@ (local.get $B1) ) ) - ;; The copy can't refer to a $B2, so we can optimize here. TODO (but GUFA - ;; can do this) + ;; The copy can't refer to a $B2, so we can optimize here. (drop (struct.get $B2 0 (local.get $B2) @@ -3132,8 +3179,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (struct.get $B1 0 - ;; CHECK-NEXT: (local.get $B1) + ;; CHECK-NEXT: (block (result i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.as_non_null + ;; CHECK-NEXT: (local.get $B1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 10) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop @@ -3179,8 +3231,7 @@ (local.get $A) ) ) - ;; The copy can't refer to a $B1, so we can optimize here. TODO (but GUFA - ;; can do this) + ;; The copy can't refer to a $B1, so we can optimize here. (drop (struct.get $B1 0 (local.get $B1) @@ -3287,16 +3338,9 @@ ;; CHECK: (func $get-B (type $9) (param $B (ref null $B)) (result (ref null $Y)) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.as_non_null - ;; CHECK-NEXT: (local.get $B) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (block - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (global.get $global) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: (local.get $B) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) ;; CHECK-NEXT: ) (func $get-B (param $B (ref null $B)) (result (ref null $Y)) ;; This should not be optimized to a global.get: no $B is created, and we From 7d0445e3351d7e40dcec0584df2c51c8a087e5e3 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 9 Sep 2025 08:24:45 -0700 Subject: [PATCH 07/13] Comment about convenience subscripting. Co-authored-by: Alon Zakai --- src/ir/struct-utils.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index 77ed41fdbea..b8e50fc1f0f 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -105,6 +105,7 @@ struct StructValuesMap return values; } + // Convenience operator for inexact queries. StructValues& operator[](HeapType type) { return (*this)[{type, Inexact}]; } From 89751b1ca691f0dd3ee35c1bfc9e056a2e426088 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 9 Sep 2025 09:34:28 -0700 Subject: [PATCH 08/13] lambda --- src/ir/struct-utils.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index b8e50fc1f0f..4b328f97ddb 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -456,14 +456,7 @@ template class TypeHierarchyPropagator { // Propagate shared fields to the subtypes, which may just be the exact // version of the same type. auto numFields = type.getStruct().fields.size(); - std::vector> subs; - if (includeExact) { - subs.emplace_back(type, Exact); - } - for (auto subType : subTypes.getImmediateSubTypes(type)) { - subs.emplace_back(subType, Inexact); - } - for (auto sub : subs) { + auto handleSubtype = [&](std::pair sub) { auto& subInfos = combinedInfos[sub]; for (Index i = 0; i < numFields; i++) { if (subInfos[i].combine(infos[i])) { @@ -474,6 +467,12 @@ template class TypeHierarchyPropagator { if (subInfos.desc.combine(infos.desc)) { work.push(sub); } + }; + if (includeExact) { + handleSubtype({type, Exact}); + } + for (auto subType : subTypes.getImmediateSubTypes(type)) { + handleSubtype({subType, Inexact}); } } } From 49dcfc7c244e4a415286e6879705f12a8cdd9de6 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 9 Sep 2025 09:37:25 -0700 Subject: [PATCH 09/13] comment on exact propagation --- src/ir/struct-utils.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index 4b328f97ddb..d89041fac24 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -415,6 +415,8 @@ template class TypeHierarchyPropagator { } private: + // `includeExact` is whether to propagate to exact subtypes only because there + // are no exact supertypes. void propagate(StructValuesMap& combinedInfos, bool toSubTypes, bool toSuperTypes, From 640307378d1fa7e8802033776472ebe60ffc8014 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 10 Sep 2025 15:06:53 -0700 Subject: [PATCH 10/13] update comment --- src/ir/struct-utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index d89041fac24..347d9700a52 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -415,8 +415,8 @@ template class TypeHierarchyPropagator { } private: - // `includeExact` is whether to propagate to exact subtypes only because there - // are no exact supertypes. + // N.B. `includeExact` is only whether to propagate to exact subtypes because + // there are no exact supertypes. void propagate(StructValuesMap& combinedInfos, bool toSubTypes, bool toSuperTypes, From 1d61606f65990346011dc468b5fbf116caac99ae Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 12 Sep 2025 14:39:41 -0700 Subject: [PATCH 11/13] remove complexity of avoiding exactness --- src/ir/struct-utils.h | 26 +++++-------------------- src/passes/ConstantFieldPropagation.cpp | 4 ++-- src/passes/GlobalTypeOptimization.cpp | 6 ++++-- src/passes/TypeRefining.cpp | 21 ++++++++++++-------- 4 files changed, 24 insertions(+), 33 deletions(-) diff --git a/src/ir/struct-utils.h b/src/ir/struct-utils.h index 347d9700a52..414a8767d35 100644 --- a/src/ir/struct-utils.h +++ b/src/ir/struct-utils.h @@ -105,11 +105,6 @@ struct StructValuesMap return values; } - // Convenience operator for inexact queries. - StructValues& operator[](HeapType type) { - return (*this)[{type, Inexact}]; - } - void combineInto(StructValuesMap& combinedInfos) const { for (auto& [type, info] : *this) { for (Index i = 0; i < info.size(); i++) { @@ -385,19 +380,13 @@ template class TypeHierarchyPropagator { // Propagate given a StructValuesMap, which means we need to take into // account fields. void propagateToSuperTypes(StructValuesMap& infos) { - propagate(infos, false, true, true); + propagate(infos, false, true); } void propagateToSubTypes(StructValuesMap& infos) { - propagate(infos, true, false, false); - } - void propagateToSubTypesWithExact(StructValuesMap& infos) { - propagate(infos, true, false, true); + propagate(infos, true, false); } void propagateToSuperAndSubTypes(StructValuesMap& infos) { - propagate(infos, true, true, false); - } - void propagateToSuperAndSubTypesWithExact(StructValuesMap& infos) { - propagate(infos, true, true, true); + propagate(infos, true, true); } // Propagate on a simpler map of structs and infos (that is, not using @@ -415,12 +404,9 @@ template class TypeHierarchyPropagator { } private: - // N.B. `includeExact` is only whether to propagate to exact subtypes because - // there are no exact supertypes. void propagate(StructValuesMap& combinedInfos, bool toSubTypes, - bool toSuperTypes, - bool includeExact) { + bool toSuperTypes) { UniqueDeferredQueue> work; for (auto& [ht, _] : combinedInfos) { work.push(ht); @@ -470,9 +456,7 @@ template class TypeHierarchyPropagator { work.push(sub); } }; - if (includeExact) { - handleSubtype({type, Exact}); - } + handleSubtype({type, Exact}); for (auto subType : subTypes.getImmediateSubTypes(type)) { handleSubtype({subType, Inexact}); } diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index e1c120685bf..d0d7f2938bd 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -557,7 +557,7 @@ struct ConstantFieldPropagation : public Pass { // a copy of A means it could be a copy of B or C). StructUtils::TypeHierarchyPropagator boolPropagator(subTypes); - boolPropagator.propagateToSubTypesWithExact(combinedCopyInfos); + boolPropagator.propagateToSubTypes(combinedCopyInfos); for (auto& [type, copied] : combinedCopyInfos) { for (Index i = 0; i < copied.size(); i++) { if (copied[i]) { @@ -569,7 +569,7 @@ struct ConstantFieldPropagation : public Pass { StructUtils::TypeHierarchyPropagator propagator( subTypes); propagator.propagateToSuperTypes(combinedNewInfos); - propagator.propagateToSuperAndSubTypesWithExact(combinedSetInfos); + propagator.propagateToSuperAndSubTypes(combinedSetInfos); // Combine both sources of information to the final information that gets // care about. diff --git a/src/passes/GlobalTypeOptimization.cpp b/src/passes/GlobalTypeOptimization.cpp index bf0dc74fdfd..546c8c0a280 100644 --- a/src/passes/GlobalTypeOptimization.cpp +++ b/src/passes/GlobalTypeOptimization.cpp @@ -210,9 +210,9 @@ struct GlobalTypeOptimization : public Pass { SubTypes subTypes(*module); StructUtils::TypeHierarchyPropagator propagator(subTypes); auto dataFromSubsAndSupersMap = combinedSetGetInfos; - propagator.propagateToSuperAndSubTypesWithExact(dataFromSubsAndSupersMap); + propagator.propagateToSuperAndSubTypes(dataFromSubsAndSupersMap); auto dataFromSupersMap = std::move(combinedSetGetInfos); - propagator.propagateToSubTypesWithExact(dataFromSupersMap); + propagator.propagateToSubTypes(dataFromSupersMap); // Find the public types, which we must not modify. auto publicTypes = ModuleUtils::getPublicHeapTypes(*module); @@ -229,6 +229,8 @@ struct GlobalTypeOptimization : public Pass { continue; } auto& fields = type.getStruct().fields; + // Use the exact entry because information from the inexact entry will + // have been propagated down into it but not vice versa. auto ht = std::make_pair(type, Exact); auto& dataFromSubsAndSupers = dataFromSubsAndSupersMap[ht]; auto& dataFromSupers = dataFromSupersMap[ht]; diff --git a/src/passes/TypeRefining.cpp b/src/passes/TypeRefining.cpp index 0afeff28404..2db26b77a3d 100644 --- a/src/passes/TypeRefining.cpp +++ b/src/passes/TypeRefining.cpp @@ -193,7 +193,8 @@ struct TypeRefining : public Pass { for (auto type : allTypes) { if (type.isStruct()) { auto& fields = type.getStruct().fields; - auto& infos = finalInfos[type]; + // Update the inexact entry because that's what we will query later. + auto& infos = finalInfos[{type, Inexact}]; for (Index i = 0; i < fields.size(); i++) { auto gufaType = oracle.getContents(DataLocation{type, i}).getType(); // Do not introduce new exact fields that might requires invalid @@ -223,7 +224,7 @@ struct TypeRefining : public Pass { } auto type = structNew->type.getHeapType(); - auto& infos = finalInfos[type]; + auto& infos = finalInfos[{type, Inexact}]; auto& fields = type.getStruct().fields; for (Index i = 0; i < fields.size(); i++) { // We are in a situation like this: @@ -287,7 +288,9 @@ struct TypeRefining : public Pass { auto& fields = type.getStruct().fields; for (Index i = 0; i < fields.size(); i++) { auto oldType = fields[i].type; - auto& info = finalInfos[type][i]; + // Use inexact because exact info will have been propagated up to + // inexact entries but not necessarily vice versa. + auto& info = finalInfos[{type, Inexact}][i]; if (!info.noted()) { info = LUBFinder(oldType); } @@ -301,11 +304,11 @@ struct TypeRefining : public Pass { // public, unchanged since we cannot optimize it Type newSuperType; if (!publicTypesSet.count(*super)) { - newSuperType = finalInfos[*super][i].getLUB(); + newSuperType = finalInfos[{*super, Inexact}][i].getLUB(); } else { newSuperType = superFields[i].type; } - auto& info = finalInfos[type][i]; + auto& info = finalInfos[{type, Inexact}][i]; auto newType = info.getLUB(); if (!Type::isSubType(newType, newSuperType)) { // To ensure we are a subtype of the super's field, simply copy that @@ -340,7 +343,7 @@ struct TypeRefining : public Pass { // After all those decisions, see if we found anything to optimize. for (Index i = 0; i < fields.size(); i++) { auto oldType = fields[i].type; - auto& lub = finalInfos[type][i]; + auto& lub = finalInfos[{type, Inexact}][i]; auto newType = lub.getLUB(); if (newType != oldType) { canOptimize = true; @@ -384,7 +387,8 @@ struct TypeRefining : public Pass { Type newFieldType; if (!curr->ref->type.isNull()) { auto oldType = curr->ref->type.getHeapType(); - newFieldType = parent.finalInfos[oldType][curr->index].getLUB(); + newFieldType = + parent.finalInfos[{oldType, Inexact}][curr->index].getLUB(); } if (curr->ref->type.isNull() || newFieldType == Type::unreachable || @@ -449,7 +453,8 @@ struct TypeRefining : public Pass { if (!oldType.isRef()) { continue; } - auto newType = parent.finalInfos[oldStructType][i].getLUB(); + auto newType = + parent.finalInfos[{oldStructType, Inexact}][i].getLUB(); newFields[i].type = getTempType(newType); } } From 0bc2907068fc8771c707ddf747170aa399643d6c Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Thu, 18 Sep 2025 18:17:17 -0700 Subject: [PATCH 12/13] comment on exact ref.test --- src/passes/ConstantFieldPropagation.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp index 8a145745e95..15e402ac8af 100644 --- a/src/passes/ConstantFieldPropagation.cpp +++ b/src/passes/ConstantFieldPropagation.cpp @@ -210,6 +210,8 @@ struct FunctionOptimizer : public WalkerPass> { // on simply applying a constant. However, we can try to use a ref.test, if // that is allowed. if (!info.isConstant()) { + // Note that if the reference is exact, we never need to use a ref.test + // because there will not be multiple subtypes to select between. if (refTest && !ref->type.isExact()) { optimizeUsingRefTest(curr, ref, index); } From a8517540c66efa963be450d690b120f26a4f2867 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 19 Sep 2025 09:06:09 -0700 Subject: [PATCH 13/13] Remove stale comments --- test/lit/passes/cfp-reftest.wast | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/lit/passes/cfp-reftest.wast b/test/lit/passes/cfp-reftest.wast index 4c618c5eeea..404f3147234 100644 --- a/test/lit/passes/cfp-reftest.wast +++ b/test/lit/passes/cfp-reftest.wast @@ -328,7 +328,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $get (param $struct (ref null $struct)) (result i32) - ;; We cannot optimize here. (struct.get $struct 0 (local.get $struct) ) @@ -408,7 +407,6 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $get (param $struct (ref null $struct)) (result i32) - ;; We cannot optimize here. (struct.get $struct 0 (local.get $struct) )