WebAssembly · tlively · Sep 19, 2025 · Sep 6, 2025 · Sep 8, 2025 · Sep 8, 2025
diff --git a/src/passes/ConstantFieldPropagation.cpp b/src/passes/ConstantFieldPropagation.cpp
@@ -91,15 +91,15 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
   // subtyping and new infos (information about struct.news).
   std::unique_ptr<Pass> create() override {
     return std::make_unique<FunctionOptimizer>(
-      propagatedInfos, subTypes, rawNewInfos, refTest);
+      propagatedInfos, refTestInfos, subTypes, refTest);
   }
 
   FunctionOptimizer(const PCVStructValuesMap& propagatedInfos,
+                    const PCVStructValuesMap& refTestInfos,
                     const SubTypes& subTypes,
-                    const PCVStructValuesMap& rawNewInfos,
                     bool refTest)
-    : propagatedInfos(propagatedInfos), subTypes(subTypes),
-      rawNewInfos(rawNewInfos), refTest(refTest) {}
+    : propagatedInfos(propagatedInfos), refTestInfos(refTestInfos),
+      subTypes(subTypes), refTest(refTest) {}
 
   template<typename T> std::optional<HeapType> getRelevantHeapType(T* ref) {
     auto type = ref->type;
@@ -210,7 +210,9 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
     // on simply applying a constant. However, we can try to use a ref.test, if
     // that is allowed.
     if (!info.isConstant()) {
-      if (refTest) {
+      // Note that if the reference is exact, we never need to use a ref.test
+      // because there will not be multiple subtypes to select between.
+      if (refTest && !ref->type.isExact()) {
         optimizeUsingRefTest(curr, ref, index);
       }
       return;
@@ -233,22 +235,6 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
     auto refType = ref->type;
     auto refHeapType = refType.getHeapType();
 
-    // We only handle immutable fields in this function, as we will be looking
-    // at |rawNewInfos|. That is, we are trying to see when a type and its
-    // subtypes have different values (so that we can differentiate between them
-    // using a ref.test), and those differences are lost in |propagatedInfos|,
-    // which has propagated to relevant types so that we can do a single check
-    // to see what value could be there. So we need to use something more
-    // precise, |rawNewInfos|, which tracks the values written to struct.news,
-    // where we know the type exactly (unlike with a struct.set). But for that
-    // reason the field must be immutable, so that it is valid to only look at
-    // the struct.news. (A more complex flow analysis could do better here, but
-    // would be far beyond the scope of this pass.)
-    if (index != StructUtils::DescriptorIndex &&
-        GCTypeUtils::getField(refType, index)->mutable_ == Mutable) {
-      return;
-    }
-
     // We seek two possible constant values. For each we track the constant and
     // the types that have that constant. For example, if we have types A, B, C
     // and A and B have 42 in their field, and C has 1337, then we'd have this:
@@ -283,13 +269,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
         return;
       }
 
-      auto iter = rawNewInfos.find({type, Exact});
-      if (iter == rawNewInfos.end()) {
-        // This type has no struct.news, so we can ignore it: it is abstract.
+      auto iter = refTestInfos.find({type, Exact});
+      if (iter == refTestInfos.end()) {
+        // This type has no allocations, so we can ignore it: it is abstract.
         return;
       }
 
       auto value = iter->second[index];
+      if (!value.hasNoted()) {
+        // Also abstract and ignorable.
+        return;
+      }
       if (!value.isConstant()) {
         // The value here is not constant, so give up entirely.
         fail = true;
@@ -409,8 +399,8 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
 
 private:
   const PCVStructValuesMap& propagatedInfos;
+  const PCVStructValuesMap& refTestInfos;
   const SubTypes& subTypes;
-  const PCVStructValuesMap& rawNewInfos;
   const bool refTest;
 
   bool changed = false;
@@ -492,20 +482,13 @@ struct ConstantFieldPropagation : public Pass {
     scanner.runOnModuleCode(runner, module);
 
     // Combine the data from the functions.
-    PCVStructValuesMap combinedNewInfos, combinedSetInfos;
-    functionNewInfos.combineInto(combinedNewInfos);
+    PCVStructValuesMap combinedSetInfos;
+    functionNewInfos.combineInto(combinedSetInfos);
     functionSetInfos.combineInto(combinedSetInfos);
     BoolStructValuesMap combinedCopyInfos;
     functionCopyInfos.combineInto(combinedCopyInfos);
 
-    // Prepare data we will need later.
-    SubTypes subTypes(*module);
-
-    // Copy the unpropagated data before we propagate. We use this in precise
-    // lookups.
-    auto rawNewInfos = combinedNewInfos;
-
-    // Handle subtyping. |combinedInfo| so far contains data that represents
+    // Handle subtyping. |combinedSetInfos| so far contains data that represents
     // each struct.new and struct.set's operation on the struct type used in
     // that instruction. That is, if we do a struct.set to type T, the value was
     // noted for type T. But our actual goal is to answer questions about
@@ -532,10 +515,11 @@ struct ConstantFieldPropagation : public Pass {
     // efficient, we therefore propagate information about the possible values
     // in each field to both subtypes and supertypes.
     //
-    // struct.new on the other hand knows exactly what type is being written to,
-    // and so given a get of $A and a new of $B, the new is relevant for the get
-    // iff $A is a subtype of $B, so we only need to propagate in one direction
-    // there, to supertypes.
+    // Values written in struct.news are equivalent to values written to exact
+    // references. In both cases, the propagation to subtypes will not do
+    // anything because an exact reference has no non-trivial subtypes. This
+    // works out because a set of a field of an exact reference (or an
+    // allocation) cannot ever affect the value read out of a subtype's field.
     //
     // An exception to the above are copies. If a field is copied then even
     // struct.new information cannot be assumed to be precise:
@@ -549,36 +533,57 @@ struct ConstantFieldPropagation : public Pass {
     //   foo(A->f0);      // These can contain 20,
     //   foo(C->f0);      // if the copy read from B.
     //
-    // To handle that, copied fields are treated like struct.set ones (by
-    // copying the struct.new data to struct.set). Note that we must propagate
-    // copying to subtypes first, as in the example above the struct.new values
-    // of subtypes must be taken into account (that is, A or a subtype is being
-    // copied, so we want to do the same thing for B and C as well as A, since
-    // a copy of A means it could be a copy of B or C).
-    StructUtils::TypeHierarchyPropagator<StructUtils::CombinableBool>
-      boolPropagator(subTypes);
-    boolPropagator.propagateToSubTypes(combinedCopyInfos);
+    // The handling of copies is explained below.
+    SubTypes subTypes(*module);
+    StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
+      subTypes);
+
+    // Compute the values without accounting for copies.
+    PCVStructValuesMap noCopySetInfos = combinedSetInfos;
+    propagator.propagateToSubTypes(noCopySetInfos);
+    propagator.propagateToSuperTypes(noCopySetInfos);
+
+    // Now account for copies. A copy takes a value from any subtype
+    // of the copy source to any subtype of the copy destination. Since we last
+    // propagated to supertypes, we know the propagated values increase
+    // monotonically as you go up the type hierarchy. The propagated value in a
+    // field therefore overapproximates the values in the corresponding field in
+    // all the subtypes. So for each copy, we can use the propagated value as
+    // the copied value. Then we will propagate set values again, this time
+    // including the copied values. We only need to repeat the propagation once;
+    // if the second propagation discovers greater values in the copied fields,
+    // it can only be because those greater values were propagated from a
+    // supertype. In that case, the greater value has also been propagated to
+    // all subtypes, so repeating the process will not further change anything.
+    //
+    // TODO: Track separate sources and destinations of copies rather than
+    // special-casing copies to self. This would let propagation discover
+    // greater copied values from unrelated types or even different field
+    // indices, so we would have to repeatedly propagate taking into account the
+    // latest discovered copied values until reaching a fixed point.
     for (auto& [type, copied] : combinedCopyInfos) {
-      for (Index i = 0; i < copied.size(); i++) {
+      for (Index i = 0; i < copied.size(); ++i) {
         if (copied[i]) {
-          combinedSetInfos[type][i].combine(combinedNewInfos[type][i]);
+          combinedSetInfos[type][i].combine(noCopySetInfos[type][i]);
         }
       }
     }
 
-    StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
-      subTypes);
-    propagator.propagateToSuperTypes(combinedNewInfos);
-    propagator.propagateToSuperAndSubTypes(combinedSetInfos);
-
-    // Combine both sources of information to the final information that gets
-    // care about.
-    PCVStructValuesMap combinedInfos = std::move(combinedNewInfos);
-    combinedSetInfos.combineInto(combinedInfos);
+    // Propagate the values again, now including values readable by copies.
+    // RefTest optimization manually checks the values in every subtype to
+    // make sure they match, so there's no need to propagate values up for that.
+    // Snapshot the info before propagating up for use in RefTest
+    // optimization.
+    PCVStructValuesMap refTestInfos;
+    propagator.propagateToSubTypes(combinedSetInfos);
+    if (refTest) {
+      refTestInfos = combinedSetInfos;
+    }
+    propagator.propagateToSuperTypes(combinedSetInfos);
 
     // Optimize.
     // TODO: Skip this if we cannot optimize anything
-    FunctionOptimizer(combinedInfos, subTypes, rawNewInfos, refTest)
+    FunctionOptimizer(combinedSetInfos, refTestInfos, subTypes, refTest)
       .run(runner, module);
   }
 };

diff --git a/test/lit/passes/cfp-reftest.wast b/test/lit/passes/cfp-reftest.wast
@@ -279,18 +279,18 @@
   )
 )
 
-;; Almost optimizable, but the field is mutable, so we can't.
+;; The field is mutable, but we can still optimize.
 (module
   ;; CHECK:      (type $struct (sub (struct (field (mut i32)))))
   (type $struct (sub (struct (mut i32))))
-  ;; CHECK:      (type $1 (func))
-
   ;; CHECK:      (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
   (type $substruct (sub $struct (struct (mut i32) f64)))
 
+  ;; CHECK:      (type $2 (func))
+
   ;; CHECK:      (type $3 (func (param (ref null $struct)) (result i32)))
 
-  ;; CHECK:      (func $create (type $1)
+  ;; CHECK:      (func $create (type $2)
   ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (struct.new $struct
   ;; CHECK-NEXT:    (i32.const 10)
@@ -317,6 +317,165 @@
     )
   )
   ;; CHECK:      (func $get (type $3) (param $struct (ref null $struct)) (result i32)
+  ;; CHECK-NEXT:  (select
+  ;; CHECK-NEXT:   (i32.const 20)
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:   (ref.test (ref $substruct)
+  ;; CHECK-NEXT:    (ref.as_non_null
+  ;; CHECK-NEXT:     (local.get $struct)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $get (param $struct (ref null $struct)) (result i32)
+    (struct.get $struct 0
+      (local.get $struct)
+    )
+  )
+)
+
+;; No-op sets do not inhibit optimization.
+(module
+  ;; CHECK:      (type $struct (sub (struct (field (mut i32)))))
+  (type $struct (sub (struct (mut i32))))
+  ;; CHECK:      (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
+  (type $substruct (sub $struct (struct (mut i32) f64)))
+
+  ;; CHECK:      (type $2 (func))
+
+  ;; CHECK:      (type $3 (func (param (ref null (exact $struct)) (ref null $substruct))))
+
+  ;; CHECK:      (type $4 (func (param (ref null $struct)) (result i32)))
+
+  ;; CHECK:      (func $create (type $2)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new $struct
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new $substruct
+  ;; CHECK-NEXT:    (i32.const 20)
+  ;; CHECK-NEXT:    (f64.const 3.14159)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $create
+    (drop
+      (struct.new $struct
+        (i32.const 10)
+      )
+    )
+    (drop
+      (struct.new $substruct
+        (i32.const 20)
+        (f64.const 3.14159)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $sets (type $3) (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct))
+  ;; CHECK-NEXT:  (struct.set $struct 0
+  ;; CHECK-NEXT:   (local.get $struct-exact)
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (struct.set $substruct 0
+  ;; CHECK-NEXT:   (local.get $substruct)
+  ;; CHECK-NEXT:   (i32.const 20)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $sets (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct))
+    (struct.set $struct 0
+      (local.get $struct-exact)
+      (i32.const 10)
+    )
+    (struct.set $substruct 0
+      (local.get $substruct)
+      (i32.const 20)
+    )
+  )
+
+  ;; CHECK:      (func $get (type $4) (param $struct (ref null $struct)) (result i32)
+  ;; CHECK-NEXT:  (select
+  ;; CHECK-NEXT:   (i32.const 20)
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:   (ref.test (ref $substruct)
+  ;; CHECK-NEXT:    (ref.as_non_null
+  ;; CHECK-NEXT:     (local.get $struct)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $get (param $struct (ref null $struct)) (result i32)
+    (struct.get $struct 0
+      (local.get $struct)
+    )
+  )
+)
+
+;; Same as above, except now the set to $struct is inexact so we cannot
+;; optimize.
+(module
+  ;; CHECK:      (type $struct (sub (struct (field (mut i32)))))
+  (type $struct (sub (struct (mut i32))))
+  ;; CHECK:      (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
+  (type $substruct (sub $struct (struct (mut i32) f64)))
+
+  ;; CHECK:      (type $2 (func))
+
+  ;; CHECK:      (type $3 (func (param (ref null $struct) (ref null $substruct))))
+
+  ;; CHECK:      (type $4 (func (param (ref null $struct)) (result i32)))
+
+  ;; CHECK:      (func $create (type $2)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new $struct
+  ;; CHECK-NEXT:    (i32.const 10)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.new $substruct
+  ;; CHECK-NEXT:    (i32.const 20)
+  ;; CHECK-NEXT:    (f64.const 3.14159)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $create
+    (drop
+      (struct.new $struct
+        (i32.const 10)
+      )
+    )
+    (drop
+      (struct.new $substruct
+        (i32.const 20)
+        (f64.const 3.14159)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $sets (type $3) (param $struct (ref null $struct)) (param $substruct (ref null $substruct))
+  ;; CHECK-NEXT:  (struct.set $struct 0
+  ;; CHECK-NEXT:   (local.get $struct)
+  ;; CHECK-NEXT:   (i32.const 10)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (struct.set $substruct 0
+  ;; CHECK-NEXT:   (local.get $substruct)
+  ;; CHECK-NEXT:   (i32.const 20)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $sets (param $struct (ref null $struct)) (param $substruct (ref null $substruct))
+    (struct.set $struct 0
+      (local.get $struct)
+      (i32.const 10)
+    )
+    (struct.set $substruct 0
+      (local.get $substruct)
+      (i32.const 20)
+    )
+  )
+
+  ;; CHECK:      (func $get (type $4) (param $struct (ref null $struct)) (result i32)
   ;; CHECK-NEXT:  (struct.get $struct 0
   ;; CHECK-NEXT:   (local.get $struct)
   ;; CHECK-NEXT:  )